From 024ac44c701d43f5e2d34bd6a35b2813a36e6010 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Sun, 29 May 2005 02:26:31 -0500
Subject: Input: This patch implements compat_ioctl for joydev.

       I've tested it with a Logitech WingMan Rumblepad on an x86-64
       machine, and on an ia32 machine to make sure I didn't break
       anything.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/joystick.h | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/joystick.h b/include/linux/joystick.h
index b7e0ab622cd..06b9af77eb7 100644
--- a/include/linux/joystick.h
+++ b/include/linux/joystick.h
@@ -111,18 +111,35 @@ struct js_corr {
 #define JS_SET_ALL		8
 
 struct JS_DATA_TYPE {
-	int buttons;
-	int x;
-	int y;
+	__s32 buttons;
+	__s32 x;
+	__s32 y;
 };
 
-struct JS_DATA_SAVE_TYPE {
-	int JS_TIMEOUT;
-	int BUSY;
-	long JS_EXPIRETIME;
-	long JS_TIMELIMIT;
+struct JS_DATA_SAVE_TYPE_32 {
+	__s32 JS_TIMEOUT;
+	__s32 BUSY;
+	__s32 JS_EXPIRETIME;
+	__s32 JS_TIMELIMIT;
 	struct JS_DATA_TYPE JS_SAVE;
 	struct JS_DATA_TYPE JS_CORR;
 };
 
+struct JS_DATA_SAVE_TYPE_64 {
+	__s32 JS_TIMEOUT;
+	__s32 BUSY;
+	__s64 JS_EXPIRETIME;
+	__s64 JS_TIMELIMIT;
+	struct JS_DATA_TYPE JS_SAVE;
+	struct JS_DATA_TYPE JS_CORR;
+};
+
+#if BITS_PER_LONG == 64
+#define JS_DATA_SAVE_TYPE JS_DATA_SAVE_TYPE_64
+#elif BITS_PER_LONG == 32
+#define JS_DATA_SAVE_TYPE JS_DATA_SAVE_TYPE_32
+#else
+#error Unexpected BITS_PER_LONG
+#endif
+
 #endif /* _LINUX_JOYSTICK_H */
-- 
cgit v1.2.3-70-g09d2


From 0fbf87caf70acec0c435233fbc39c7bd0aca3ca6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Sun, 29 May 2005 02:29:25 -0500
Subject: Input: add semaphore and user count to input_dev structure;       
 serialize open and close calls and ensure that device's        open and close
 methods are only called when first user        opens it or last user closes
 it.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 33 ++++++++++++++++++++++++++++-----
 include/linux/input.h |  4 ++++
 2 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 3385dd03abf..1885f369e3e 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -219,10 +219,24 @@ void input_release_device(struct input_handle *handle)
 
 int input_open_device(struct input_handle *handle)
 {
+	struct input_dev *dev = handle->dev;
+	int err;
+
+	err = down_interruptible(&dev->sem);
+	if (err)
+		return err;
+
 	handle->open++;
-	if (handle->dev->open)
-		return handle->dev->open(handle->dev);
-	return 0;
+
+	if (!dev->users++ && dev->open)
+		err = dev->open(dev);
+
+	if (err)
+		handle->open--;
+
+	up(&dev->sem);
+
+	return err;
 }
 
 int input_flush_device(struct input_handle* handle, struct file* file)
@@ -235,10 +249,17 @@ int input_flush_device(struct input_handle* handle, struct file* file)
 
 void input_close_device(struct input_handle *handle)
 {
+	struct input_dev *dev = handle->dev;
+
 	input_release_device(handle);
-	if (handle->dev->close)
-		handle->dev->close(handle->dev);
+
+	down(&dev->sem);
+
+	if (!--dev->users && dev->close)
+		dev->close(dev);
 	handle->open--;
+
+	up(&dev->sem);
 }
 
 static void input_link_handle(struct input_handle *handle)
@@ -415,6 +436,8 @@ void input_register_device(struct input_dev *dev)
 
 	set_bit(EV_SYN, dev->evbit);
 
+	init_MUTEX(&dev->sem);
+
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
diff --git a/include/linux/input.h b/include/linux/input.h
index 72731d7d189..43e8ecec602 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -859,6 +859,10 @@ struct input_dev {
 	int (*erase_effect)(struct input_dev *dev, int effect_id);
 
 	struct input_handle *grab;
+
+	struct semaphore sem;	/* serializes open and close operations */
+	unsigned int users;
+
 	struct device *dev;
 
 	struct list_head	h_list;
-- 
cgit v1.2.3-70-g09d2


From c611763d048990de5cdf848d97af6392f8fa7430 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Wed, 1 Jun 2005 02:39:51 -0500
Subject: Input: add ps2_drain() to libps2 to allow reading and discarding     
   given number of bytes from device. Change ps2_command to        allow using
 0 as command ID and actually pass it to the        device instead of working
 as a drain.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/alps.c   |  3 +--
 drivers/input/serio/libps2.c | 46 ++++++++++++++++++++++++++++++++++++--------
 include/linux/libps2.h       |  1 +
 3 files changed, 40 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 2679a165d39..ffdc8231319 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -270,7 +270,6 @@ static struct alps_model_info *alps_get_model(struct psmouse *psmouse, int *vers
 static int alps_passthrough_mode(struct psmouse *psmouse, int enable)
 {
 	struct ps2dev *ps2dev = &psmouse->ps2dev;
-	unsigned char param[3];
 	int cmd = enable ? PSMOUSE_CMD_SETSCALE21 : PSMOUSE_CMD_SETSCALE11;
 
 	if (ps2_command(ps2dev, NULL, cmd) ||
@@ -280,7 +279,7 @@ static int alps_passthrough_mode(struct psmouse *psmouse, int enable)
 		return -1;
 
 	/* we may get 3 more bytes, just ignore them */
-	ps2_command(ps2dev, param, 0x0300);
+	ps2_drain(ps2dev, 3, 100);
 
 	return 0;
 }
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index c978657068c..92b92ee0379 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -29,6 +29,7 @@ MODULE_LICENSE("GPL");
 
 EXPORT_SYMBOL(ps2_init);
 EXPORT_SYMBOL(ps2_sendbyte);
+EXPORT_SYMBOL(ps2_drain);
 EXPORT_SYMBOL(ps2_command);
 EXPORT_SYMBOL(ps2_schedule_command);
 EXPORT_SYMBOL(ps2_handle_ack);
@@ -45,11 +46,11 @@ struct ps2work {
 
 
 /*
- * ps2_sendbyte() sends a byte to the mouse, and waits for acknowledge.
- * It doesn't handle retransmission, though it could - because when there would
- * be need for retransmissions, the mouse has to be replaced anyway.
+ * ps2_sendbyte() sends a byte to the device and waits for acknowledge.
+ * It doesn't handle retransmission, though it could - because if there
+ * is a need for retransmissions device has to be replaced anyway.
  *
- * ps2_sendbyte() can only be called from a process context
+ * ps2_sendbyte() can only be called from a process context.
  */
 
 int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
@@ -71,6 +72,31 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
 	return -ps2dev->nak;
 }
 
+/*
+ * ps2_drain() waits for device to transmit requested number of bytes
+ * and discards them.
+ */
+
+void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
+{
+	if (maxbytes > sizeof(ps2dev->cmdbuf)) {
+		WARN_ON(1);
+		maxbytes = sizeof(ps2dev->cmdbuf);
+	}
+
+	down(&ps2dev->cmd_sem);
+
+	serio_pause_rx(ps2dev->serio);
+	ps2dev->flags = PS2_FLAG_CMD;
+	ps2dev->cmdcnt = maxbytes;
+	serio_continue_rx(ps2dev->serio);
+
+	wait_event_timeout(ps2dev->wait,
+			   !(ps2dev->flags & PS2_FLAG_CMD),
+			   msecs_to_jiffies(timeout));
+	up(&ps2dev->cmd_sem);
+}
+
 /*
  * ps2_command() sends a command and its parameters to the mouse,
  * then waits for the response and puts it in the param array.
@@ -86,6 +112,11 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 	int rc = -1;
 	int i;
 
+	if (receive > sizeof(ps2dev->cmdbuf)) {
+		WARN_ON(1);
+		return -1;
+	}
+
 	down(&ps2dev->cmd_sem);
 
 	serio_pause_rx(ps2dev->serio);
@@ -101,10 +132,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 	 * ACKing the reset command, and so it can take a long
 	 * time before the ACK arrrives.
 	 */
-	if (command & 0xff)
-		if (ps2_sendbyte(ps2dev, command & 0xff,
-			command == PS2_CMD_RESET_BAT ? 1000 : 200))
-			goto out;
+	if (ps2_sendbyte(ps2dev, command & 0xff,
+			 command == PS2_CMD_RESET_BAT ? 1000 : 200))
+		goto out;
 
 	for (i = 0; i < send; i++)
 		if (ps2_sendbyte(ps2dev, param[i], 200))
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index 923bdbc6d9e..a710bddda4e 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -41,6 +41,7 @@ struct ps2dev {
 
 void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
 int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
+void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
 int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_schedule_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
-- 
cgit v1.2.3-70-g09d2


From dbf4ccd6043e58ed32fbf253fb3f0a9991e4c13a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Wed, 1 Jun 2005 02:40:01 -0500
Subject: Input: psmouse - export protocol as a sysfs per-device attribute     
   to allow easy switching at run-time.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/psmouse-base.c | 291 +++++++++++++++++++++++++++++++------
 drivers/input/mouse/psmouse.h      |   1 +
 drivers/input/serio/serio.c        |  18 ++-
 include/linux/serio.h              |   6 +
 4 files changed, 266 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 259e6b70544..19785a6c5ab 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -32,15 +32,14 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-static unsigned int psmouse_max_proto = -1U;
+static unsigned int psmouse_max_proto = PSMOUSE_AUTO;
 static int psmouse_set_maxproto(const char *val, struct kernel_param *kp);
 static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp);
-static char *psmouse_proto_abbrev[] = { NULL, "bare", NULL, NULL, NULL, "imps", "exps", NULL, NULL, "lifebook" };
 #define param_check_proto_abbrev(name, p)	__param_check(name, p, unsigned int)
 #define param_set_proto_abbrev			psmouse_set_maxproto
 #define param_get_proto_abbrev			psmouse_get_maxproto
 module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644);
-MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, lifebook, any). Useful for KVM switches.");
+MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches.");
 
 static unsigned int psmouse_resolution = 200;
 module_param_named(resolution, psmouse_resolution, uint, 0644);
@@ -58,6 +57,7 @@ static unsigned int psmouse_resetafter;
 module_param_named(resetafter, psmouse_resetafter, uint, 0644);
 MODULE_PARM_DESC(resetafter, "Reset device after so many bad packets (0 = never).");
 
+PSMOUSE_DEFINE_ATTR(protocol);
 PSMOUSE_DEFINE_ATTR(rate);
 PSMOUSE_DEFINE_ATTR(resolution);
 PSMOUSE_DEFINE_ATTR(resetafter);
@@ -77,7 +77,14 @@ __obsolete_setup("psmouse_rate=");
  */
 static DECLARE_MUTEX(psmouse_sem);
 
-static char *psmouse_protocols[] = { "None", "PS/2", "PS2++", "ThinkPS/2", "GenPS/2", "ImPS/2", "ImExPS/2", "SynPS/2", "AlpsPS/2", "LBPS/2" };
+struct psmouse_protocol {
+	enum psmouse_type type;
+	char *name;
+	char *alias;
+	int maxproto;
+	int (*detect)(struct psmouse *, int);
+	int (*init)(struct psmouse *);
+};
 
 /*
  * psmouse_process_byte() analyzes the PS/2 data stream and reports
@@ -417,12 +424,15 @@ static int thinking_detect(struct psmouse *psmouse, int set_properties)
  */
 static int ps2bare_detect(struct psmouse *psmouse, int set_properties)
 {
-	if (!psmouse->vendor) psmouse->vendor = "Generic";
-	if (!psmouse->name) psmouse->name = "Mouse";
+	if (set_properties) {
+		if (!psmouse->vendor) psmouse->vendor = "Generic";
+		if (!psmouse->name) psmouse->name = "Mouse";
+	}
 
 	return 0;
 }
 
+
 /*
  * psmouse_extensions() probes for any extensions to the basic PS/2 protocol
  * the mouse may have.
@@ -437,9 +447,7 @@ static int psmouse_extensions(struct psmouse *psmouse,
  * We always check for lifebook because it does not disturb mouse
  * (it only checks DMI information).
  */
-	if (lifebook_detect(psmouse, set_properties) == 0 ||
-	    max_proto == PSMOUSE_LIFEBOOK) {
-
+	if (lifebook_detect(psmouse, set_properties) == 0) {
 		if (max_proto > PSMOUSE_IMEX) {
 			if (!set_properties || lifebook_init(psmouse) == 0)
 				return PSMOUSE_LIFEBOOK;
@@ -529,6 +537,103 @@ static int psmouse_extensions(struct psmouse *psmouse,
 	return PSMOUSE_PS2;
 }
 
+static struct psmouse_protocol psmouse_protocols[] = {
+	{
+		.type		= PSMOUSE_PS2,
+		.name		= "PS/2",
+		.alias		= "bare",
+		.maxproto	= 1,
+		.detect		= ps2bare_detect,
+	},
+	{
+		.type		= PSMOUSE_PS2PP,
+		.name		= "PS2++",
+		.alias		= "logitech",
+		.detect		= ps2pp_init,
+	},
+	{
+		.type		= PSMOUSE_THINKPS,
+		.name		= "ThinkPS/2",
+		.alias		= "thinkps",
+		.detect		= thinking_detect,
+	},
+	{
+		.type		= PSMOUSE_GENPS,
+		.name		= "GenPS/2",
+		.alias		= "genius",
+		.detect		= genius_detect,
+	},
+	{
+		.type		= PSMOUSE_IMPS,
+		.name		= "ImPS/2",
+		.alias		= "imps",
+		.maxproto	= 1,
+		.detect		= intellimouse_detect,
+	},
+	{
+		.type		= PSMOUSE_IMEX,
+		.name		= "ImExPS/2",
+		.alias		= "exps",
+		.maxproto	= 1,
+		.detect		= im_explorer_detect,
+	},
+	{
+		.type		= PSMOUSE_SYNAPTICS,
+		.name		= "SynPS/2",
+		.alias		= "synaptics",
+		.detect		= synaptics_detect,
+		.init		= synaptics_init,
+	},
+	{
+		.type		= PSMOUSE_ALPS,
+		.name		= "AlpsPS/2",
+		.alias		= "alps",
+		.detect		= alps_detect,
+		.init		= alps_init,
+	},
+	{
+		.type		= PSMOUSE_LIFEBOOK,
+		.name		= "LBPS/2",
+		.alias		= "lifebook",
+		.init		= lifebook_init,
+	},
+	{
+		.type		= PSMOUSE_AUTO,
+		.name		= "auto",
+		.alias		= "any",
+		.maxproto	= 1,
+	},
+};
+
+static struct psmouse_protocol *psmouse_protocol_by_type(enum psmouse_type type)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++)
+		if (psmouse_protocols[i].type == type)
+			return &psmouse_protocols[i];
+
+	WARN_ON(1);
+	return &psmouse_protocols[0];
+}
+
+static struct psmouse_protocol *psmouse_protocol_by_name(const char *name, size_t len)
+{
+	struct psmouse_protocol *p;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++) {
+		p = &psmouse_protocols[i];
+
+		if ((strlen(p->name) == len && !strncmp(p->name, name, len)) ||
+		    (strlen(p->alias) == len && !strncmp(p->alias, name, len)))
+			return &psmouse_protocols[i];
+	}
+
+	return NULL;
+}
+
+
 /*
  * psmouse_probe() probes for a PS/2 mouse.
  */
@@ -680,6 +785,7 @@ static void psmouse_disconnect(struct serio *serio)
 
 	psmouse = serio_get_drvdata(serio);
 
+	device_remove_file(&serio->dev, &psmouse_attr_protocol);
 	device_remove_file(&serio->dev, &psmouse_attr_rate);
 	device_remove_file(&serio->dev, &psmouse_attr_resolution);
 	device_remove_file(&serio->dev, &psmouse_attr_resetafter);
@@ -712,6 +818,49 @@ static void psmouse_disconnect(struct serio *serio)
 	up(&psmouse_sem);
 }
 
+static int psmouse_switch_protocol(struct psmouse *psmouse, struct psmouse_protocol *proto)
+{
+	memset(&psmouse->dev, 0, sizeof(struct input_dev));
+
+	init_input_dev(&psmouse->dev);
+
+	psmouse->dev.private = psmouse;
+	psmouse->dev.dev = &psmouse->ps2dev.serio->dev;
+
+	psmouse->dev.evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+	psmouse->dev.keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	psmouse->dev.relbit[0] = BIT(REL_X) | BIT(REL_Y);
+
+	psmouse->set_rate = psmouse_set_rate;
+	psmouse->set_resolution = psmouse_set_resolution;
+	psmouse->protocol_handler = psmouse_process_byte;
+	psmouse->pktsize = 3;
+
+	if (proto && (proto->detect || proto->init)) {
+		if (proto->detect && proto->detect(psmouse, 1) < 0)
+			return -1;
+
+		if (proto->init && proto->init(psmouse) < 0)
+			return -1;
+
+		psmouse->type = proto->type;
+	}
+	else
+		psmouse->type = psmouse_extensions(psmouse, psmouse_max_proto, 1);
+
+	sprintf(psmouse->devname, "%s %s %s",
+		psmouse_protocol_by_type(psmouse->type)->name, psmouse->vendor, psmouse->name);
+
+	psmouse->dev.name = psmouse->devname;
+	psmouse->dev.phys = psmouse->phys;
+	psmouse->dev.id.bustype = BUS_I8042;
+	psmouse->dev.id.vendor = 0x0002;
+	psmouse->dev.id.product = psmouse->type;
+	psmouse->dev.id.version = psmouse->model;
+
+	return 0;
+}
+
 /*
  * psmouse_connect() is a callback from the serio module when
  * an unhandled serio port is found.
@@ -739,11 +888,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
 
 	ps2_init(&psmouse->ps2dev, serio);
 	sprintf(psmouse->phys, "%s/input0", serio->phys);
-	psmouse->dev.evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	psmouse->dev.keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	psmouse->dev.relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	psmouse->dev.private = psmouse;
-	psmouse->dev.dev = &serio->dev;
+
 	psmouse_set_state(psmouse, PSMOUSE_INITIALIZING);
 
 	serio_set_drvdata(serio, psmouse);
@@ -767,25 +912,10 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
 	psmouse->resolution = psmouse_resolution;
 	psmouse->resetafter = psmouse_resetafter;
 	psmouse->smartscroll = psmouse_smartscroll;
-	psmouse->set_rate = psmouse_set_rate;
-	psmouse->set_resolution = psmouse_set_resolution;
-	psmouse->protocol_handler = psmouse_process_byte;
-	psmouse->pktsize = 3;
-
-	psmouse->type = psmouse_extensions(psmouse, psmouse_max_proto, 1);
-
-	sprintf(psmouse->devname, "%s %s %s",
-		psmouse_protocols[psmouse->type], psmouse->vendor, psmouse->name);
 
-	psmouse->dev.name = psmouse->devname;
-	psmouse->dev.phys = psmouse->phys;
-	psmouse->dev.id.bustype = BUS_I8042;
-	psmouse->dev.id.vendor = 0x0002;
-	psmouse->dev.id.product = psmouse->type;
-	psmouse->dev.id.version = psmouse->model;
+	psmouse_switch_protocol(psmouse, NULL);
 
 	input_register_device(&psmouse->dev);
-
 	printk(KERN_INFO "input: %s on %s\n", psmouse->devname, serio->phys);
 
 	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
@@ -795,6 +925,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
 	if (parent && parent->pt_activate)
 		parent->pt_activate(parent);
 
+	device_create_file(&serio->dev, &psmouse_attr_protocol);
 	device_create_file(&serio->dev, &psmouse_attr_rate);
 	device_create_file(&serio->dev, &psmouse_attr_resolution);
 	device_create_file(&serio->dev, &psmouse_attr_resetafter);
@@ -946,11 +1077,14 @@ ssize_t psmouse_attr_set_helper(struct device *dev, const char *buf, size_t coun
 		parent = serio_get_drvdata(serio->parent);
 		psmouse_deactivate(parent);
 	}
+
 	psmouse_deactivate(psmouse);
 
 	retval = handler(psmouse, buf, count);
 
-	psmouse_activate(psmouse);
+	if (retval != -ENODEV)
+		psmouse_activate(psmouse);
+
 	if (parent)
 		psmouse_activate(parent);
 
@@ -961,6 +1095,75 @@ ssize_t psmouse_attr_set_helper(struct device *dev, const char *buf, size_t coun
 	return retval;
 }
 
+static ssize_t psmouse_attr_show_protocol(struct psmouse *psmouse, char *buf)
+{
+	return sprintf(buf, "%s\n", psmouse_protocol_by_type(psmouse->type)->name);
+}
+
+static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, const char *buf, size_t count)
+{
+	struct serio *serio = psmouse->ps2dev.serio;
+	struct psmouse *parent = NULL;
+	struct psmouse_protocol *proto;
+	int retry = 0;
+
+	if (!(proto = psmouse_protocol_by_name(buf, count)))
+		return -EINVAL;
+
+	if (psmouse->type == proto->type)
+		return count;
+
+	while (serio->child) {
+		if (++retry > 3) {
+			printk(KERN_WARNING "psmouse: failed to destroy child port, protocol change aborted.\n");
+			return -EIO;
+		}
+
+		up(&psmouse_sem);
+		serio_unpin_driver(serio);
+		serio_unregister_child_port(serio);
+		serio_pin_driver_uninterruptible(serio);
+		down(&psmouse_sem);
+
+		if (serio->drv != &psmouse_drv)
+			return -ENODEV;
+
+		if (psmouse->type == proto->type)
+			return count; /* switched by other thread */
+	}
+
+	if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) {
+		parent = serio_get_drvdata(serio->parent);
+		if (parent->pt_deactivate)
+			parent->pt_deactivate(parent);
+	}
+
+	if (psmouse->disconnect)
+		psmouse->disconnect(psmouse);
+
+	psmouse_set_state(psmouse, PSMOUSE_IGNORE);
+	input_unregister_device(&psmouse->dev);
+
+	psmouse_set_state(psmouse, PSMOUSE_INITIALIZING);
+
+	if (psmouse_switch_protocol(psmouse, proto) < 0) {
+		psmouse_reset(psmouse);
+		/* default to PSMOUSE_PS2 */
+		psmouse_switch_protocol(psmouse, &psmouse_protocols[0]);
+	}
+
+	psmouse_initialize(psmouse);
+	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
+
+	input_register_device(&psmouse->dev);
+	printk(KERN_INFO "input: %s on %s\n", psmouse->devname, serio->phys);
+
+	if (parent && parent->pt_activate)
+		parent->pt_activate(parent);
+
+	return count;
+}
+
 static ssize_t psmouse_attr_show_rate(struct psmouse *psmouse, char *buf)
 {
 	return sprintf(buf, "%d\n", psmouse->rate);
@@ -1017,34 +1220,26 @@ static ssize_t psmouse_attr_set_resetafter(struct psmouse *psmouse, const char *
 
 static int psmouse_set_maxproto(const char *val, struct kernel_param *kp)
 {
-	int i;
+	struct psmouse_protocol *proto;
 
 	if (!val)
 		return -EINVAL;
 
-	if (!strncmp(val, "any", 3)) {
-		*((unsigned int *)kp->arg) = -1U;
-		return 0;
-	}
+	proto = psmouse_protocol_by_name(val, strlen(val));
 
-	for (i = 0; i < ARRAY_SIZE(psmouse_proto_abbrev); i++) {
-		if (!psmouse_proto_abbrev[i])
-			continue;
+	if (!proto || !proto->maxproto)
+		return -EINVAL;
 
-		if (!strncmp(val, psmouse_proto_abbrev[i], strlen(psmouse_proto_abbrev[i]))) {
-			*((unsigned int *)kp->arg) = i;
-			return 0;
-		}
-	}
+	*((unsigned int *)kp->arg) = proto->type;
 
-	return -EINVAL;					\
+	return 0;					\
 }
 
 static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp)
 {
-	return sprintf(buffer, "%s\n",
-			psmouse_max_proto < ARRAY_SIZE(psmouse_proto_abbrev) ?
-				psmouse_proto_abbrev[psmouse_max_proto] : "any");
+	int type = *((unsigned int *)kp->arg);
+
+	return sprintf(buffer, "%s\n", psmouse_protocol_by_type(type)->name);
 }
 
 static int __init psmouse_init(void)
diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h
index 4848be627a6..dc8e9ae07f3 100644
--- a/drivers/input/mouse/psmouse.h
+++ b/drivers/input/mouse/psmouse.h
@@ -78,6 +78,7 @@ enum psmouse_type {
 	PSMOUSE_SYNAPTICS,
 	PSMOUSE_ALPS,
 	PSMOUSE_LIFEBOOK,
+	PSMOUSE_AUTO		/* This one should always be last */
 };
 
 int psmouse_sliced_command(struct psmouse *psmouse, unsigned char command);
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index b82815a0b65..615bf62ad46 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -42,6 +42,7 @@ MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(serio_interrupt);
 EXPORT_SYMBOL(__serio_register_port);
 EXPORT_SYMBOL(serio_unregister_port);
+EXPORT_SYMBOL(serio_unregister_child_port);
 EXPORT_SYMBOL(__serio_unregister_port_delayed);
 EXPORT_SYMBOL(__serio_register_driver);
 EXPORT_SYMBOL(serio_unregister_driver);
@@ -179,12 +180,12 @@ static void serio_queue_event(void *object, struct module *owner,
 	spin_lock_irqsave(&serio_event_lock, flags);
 
 	/*
- 	 * Scan event list for the other events for the same serio port,
+	 * Scan event list for the other events for the same serio port,
 	 * starting with the most recent one. If event is the same we
 	 * do not need add new one. If event is of different type we
 	 * need to add this event and should not look further because
 	 * we need to preseve sequence of distinct events.
- 	 */
+	 */
 	list_for_each_entry_reverse(event, &serio_event_list, node) {
 		if (event->object == object) {
 			if (event->type == event_type)
@@ -653,6 +654,19 @@ void serio_unregister_port(struct serio *serio)
 	up(&serio_sem);
 }
 
+/*
+ * Safely unregisters child port if one is present.
+ */
+void serio_unregister_child_port(struct serio *serio)
+{
+	down(&serio_sem);
+	if (serio->child) {
+		serio_disconnect_port(serio->child);
+		serio_destroy_port(serio->child);
+	}
+	up(&serio_sem);
+}
+
 /*
  * Submits register request to kseriod for subsequent execution.
  * Can be used when it is not obvious whether the serio_sem is
diff --git a/include/linux/serio.h b/include/linux/serio.h
index a2d3b9ae06f..aa4d6493a03 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -83,6 +83,7 @@ static inline void serio_register_port(struct serio *serio)
 }
 
 void serio_unregister_port(struct serio *serio);
+void serio_unregister_child_port(struct serio *serio);
 void __serio_unregister_port_delayed(struct serio *serio, struct module *owner);
 static inline void serio_unregister_port_delayed(struct serio *serio)
 {
@@ -153,6 +154,11 @@ static inline int serio_pin_driver(struct serio *serio)
 	return down_interruptible(&serio->drv_sem);
 }
 
+static inline void serio_pin_driver_uninterruptible(struct serio *serio)
+{
+	down(&serio->drv_sem);
+}
+
 static inline void serio_unpin_driver(struct serio *serio)
 {
 	up(&serio->drv_sem);
-- 
cgit v1.2.3-70-g09d2


From 169a3e66637c667b43dab7c319ffd5c99804cad8 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Sun, 26 Jun 2005 17:54:11 -0400
Subject: bonding: xor/802.3ad improved slave hash

Add support for alternate slave selection algorithms to bonding
balance-xor and 802.3ad modes.  Default mode (what we have now: xor of
MAC addresses) is "layer2", new choice is "layer3+4", using IP and port
information for hashing to select peer.

Originally submitted by Jason Gabler for balance-xor mode;
modified by Jay Vosburgh to additionally support 802.3ad mode.  Jason's
original comment is as follows:

The attached patch to the Linux Etherchannel Bonding driver modifies the
driver's "balance-xor" mode as follows:

      - alternate hashing policy support for mode 2
        * Added kernel parameter "xmit_policy" to allow the specification
          of different hashing policies for mode 2.  The original mode 2
          policy is the default, now found in xmit_hash_policy_layer2().
        * Added xmit_hash_policy_layer34()

This patch was inspired by hashing policies implemented by Cisco,
Foundry and IBM, which are explained in
Foundry documentation found at:
http://www.foundrynet.com/services/documentation/sribcg/Trunking.html#112750

Signed-off-by: Jason Gabler <jygabler@lbl.gov>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
---
 drivers/net/bonding/bond_3ad.c  |   3 +-
 drivers/net/bonding/bond_main.c | 107 ++++++++++++++++++++++++++++++++++++----
 drivers/net/bonding/bonding.h   |  10 +++-
 include/linux/if_bonding.h      |   7 +++
 4 files changed, 114 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 6233c4ffb80..a2e8dda5afa 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2346,7 +2346,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 {
 	struct slave *slave, *start_at;
 	struct bonding *bond = dev->priv;
-	struct ethhdr *data = (struct ethhdr *)skb->data;
 	int slave_agg_no;
 	int slaves_in_agg;
 	int agg_id;
@@ -2377,7 +2376,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
-	slave_agg_no = (data->h_dest[5]^bond->dev->dev_addr[5]) % slaves_in_agg;
+	slave_agg_no = bond->xmit_hash_policy(skb, dev, slaves_in_agg);
 
 	bond_for_each_slave(bond, slave, i) {
 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 545f6fe025a..2c930da90a8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -479,6 +479,14 @@
  * 	- Support for generating gratuitous ARPs in active-backup mode.
  * 	  Includes support for VLAN tagging all bonding-generated ARPs
  * 	  as needed.  Set version to 2.6.2.
+ * 2005/06/08 - Jason Gabler <jygabler at lbl dot gov>
+ *	- alternate hashing policy support for mode 2
+ *	  * Added kernel parameter "xmit_hash_policy" to allow the selection
+ *	    of different hashing policies for mode 2.  The original mode 2
+ *	    policy is the default, now found in xmit_hash_policy_layer2().
+ *	  * Added xmit_hash_policy_layer34()
+ *	- Modified by Jay Vosburgh <fubar@us.ibm.com> to also support mode 4.
+ *	  Set version to 2.6.3.
  */
 
 //#define BONDING_DEBUG 1
@@ -493,7 +501,10 @@
 #include <linux/ptrace.h>
 #include <linux/ioport.h>
 #include <linux/in.h>
+#include <net/ip.h>
 #include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -541,6 +552,7 @@ static int use_carrier	= 1;
 static char *mode	= NULL;
 static char *primary	= NULL;
 static char *lacp_rate	= NULL;
+static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
 
@@ -560,6 +572,8 @@ module_param(primary, charp, 0);
 MODULE_PARM_DESC(primary, "Primary network device to use");
 module_param(lacp_rate, charp, 0);
 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)");
+module_param(xmit_hash_policy, charp, 0);
+MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method : 0 for layer 2 (default), 1 for layer 3+4");
 module_param(arp_interval, int, 0);
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 module_param_array(arp_ip_target, charp, NULL, 0);
@@ -579,6 +593,7 @@ static struct proc_dir_entry *bond_proc_dir = NULL;
 static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ;
 static int arp_ip_count	= 0;
 static int bond_mode	= BOND_MODE_ROUNDROBIN;
+static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2;
 static int lacp_fast	= 0;
 static int app_abi_ver	= 0;
 static int orig_app_abi_ver = -1; /* This is used to save the first ABI version
@@ -588,7 +603,6 @@ static int orig_app_abi_ver = -1; /* This is used to save the first ABI version
 				   * command comes from an application using
 				   * another ABI version.
 				   */
-
 struct bond_parm_tbl {
 	char *modename;
 	int mode;
@@ -611,9 +625,15 @@ static struct bond_parm_tbl bond_mode_tbl[] = {
 {	NULL,			-1},
 };
 
+static struct bond_parm_tbl xmit_hashtype_tbl[] = {
+{	"layer2",		BOND_XMIT_POLICY_LAYER2},
+{	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
+{	NULL,			-1},
+};
+
 /*-------------------------- Forward declarations ---------------------------*/
 
-static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode);
+static inline void bond_set_mode_ops(struct bonding *bond, int mode);
 static void bond_send_gratuitous_arp(struct bonding *bond);
 
 /*---------------------------- General routines -----------------------------*/
@@ -3724,6 +3744,46 @@ static void bond_unregister_lacpdu(struct bonding *bond)
 	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
 }
 
+/*---------------------------- Hashing Policies -----------------------------*/
+
+/*
+ * Hash for the the output device based upon layer 3 and layer 4 data. If
+ * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
+ * altogether not IP, mimic bond_xmit_hash_policy_l2()
+ */
+static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
+				    struct net_device *bond_dev, int count)
+{
+	struct ethhdr *data = (struct ethhdr *)skb->data;
+	struct iphdr *iph = skb->nh.iph;
+	u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
+	int layer4_xor = 0;
+
+	if (skb->protocol == __constant_htons(ETH_P_IP)) {
+		if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) &&
+		    (iph->protocol == IPPROTO_TCP ||
+		     iph->protocol == IPPROTO_UDP)) {
+			layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1)));
+		}
+		return (layer4_xor ^
+			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
+
+	}
+
+	return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
+}
+
+/*
+ * Hash for the output device based upon layer 2 data
+ */
+static int bond_xmit_hash_policy_l2(struct sk_buff *skb,
+				   struct net_device *bond_dev, int count)
+{
+	struct ethhdr *data = (struct ethhdr *)skb->data;
+
+	return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count;
+}
+
 /*-------------------------- Device entry points ----------------------------*/
 
 static int bond_open(struct net_device *bond_dev)
@@ -4310,14 +4370,13 @@ out:
 }
 
 /*
- * in XOR mode, we determine the output device by performing xor on
- * the source and destination hw adresses.  If this device is not
- * enabled, find the next slave following this xor slave.
+ * In bond_xmit_xor() , we determine the output device by using a pre-
+ * determined xmit_hash_policy(), If the selected device is not enabled,
+ * find the next active slave.
  */
 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = bond_dev->priv;
-	struct ethhdr *data = (struct ethhdr *)skb->data;
 	struct slave *slave, *start_at;
 	int slave_no;
 	int i;
@@ -4329,7 +4388,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 		goto out;
 	}
 
-	slave_no = (data->h_dest[5]^bond_dev->dev_addr[5]) % bond->slave_cnt;
+	slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt);
 
 	bond_for_each_slave(bond, slave, i) {
 		slave_no--;
@@ -4425,8 +4484,10 @@ out:
 /*
  * set bond mode specific net device operations
  */
-static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode)
+static inline void bond_set_mode_ops(struct bonding *bond, int mode)
 {
+	struct net_device *bond_dev = bond->dev;
+
 	switch (mode) {
 	case BOND_MODE_ROUNDROBIN:
 		bond_dev->hard_start_xmit = bond_xmit_roundrobin;
@@ -4436,12 +4497,20 @@ static inline void bond_set_mode_ops(struct net_device *bond_dev, int mode)
 		break;
 	case BOND_MODE_XOR:
 		bond_dev->hard_start_xmit = bond_xmit_xor;
+		if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+			bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
+		else
+			bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
 		break;
 	case BOND_MODE_BROADCAST:
 		bond_dev->hard_start_xmit = bond_xmit_broadcast;
 		break;
 	case BOND_MODE_8023AD:
 		bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
+		if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+			bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
+		else
+			bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
 		break;
 	case BOND_MODE_TLB:
 	case BOND_MODE_ALB:
@@ -4490,7 +4559,7 @@ static int __init bond_init(struct net_device *bond_dev, struct bond_params *par
 	bond_dev->change_mtu = bond_change_mtu;
 	bond_dev->set_mac_address = bond_set_mac_address;
 
-	bond_set_mode_ops(bond_dev, bond->params.mode);
+	bond_set_mode_ops(bond, bond->params.mode);
 
 	bond_dev->destructor = free_netdev;
 
@@ -4601,6 +4670,25 @@ static int bond_check_params(struct bond_params *params)
 		}
 	}
 
+	if (xmit_hash_policy) {
+		if ((bond_mode != BOND_MODE_XOR) &&
+		    (bond_mode != BOND_MODE_8023AD)) {
+			printk(KERN_INFO DRV_NAME
+			       ": xor_mode param is irrelevant in mode %s\n",
+			       bond_mode_name(bond_mode));
+		} else {
+			xmit_hashtype = bond_parse_parm(xmit_hash_policy,
+							xmit_hashtype_tbl);
+			if (xmit_hashtype == -1) {
+				printk(KERN_ERR DRV_NAME
+			       	": Error: Invalid xmit_hash_policy \"%s\"\n",
+			       	xmit_hash_policy == NULL ? "NULL" :
+				       xmit_hash_policy);
+				return -EINVAL;
+			}
+		}
+	}
+
 	if (lacp_rate) {
 		if (bond_mode != BOND_MODE_8023AD) {
 			printk(KERN_INFO DRV_NAME
@@ -4812,6 +4900,7 @@ static int bond_check_params(struct bond_params *params)
 
 	/* fill params struct with the proper values */
 	params->mode = bond_mode;
+	params->xmit_policy = xmit_hashtype;
 	params->miimon = miimon;
 	params->arp_interval = arp_interval;
 	params->updelay = updelay;
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6558af22eda..d27f377b3ee 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -25,6 +25,10 @@
  *
  * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
  *	- Code cleanup and style changes
+ *
+ * 2005/05/05 - Jason Gabler <jygabler at lbl dot gov>
+ *      - added "xmit_policy" kernel parameter for alternate hashing policy
+ *	  support for mode 2
  */
 
 #ifndef _LINUX_BONDING_H
@@ -36,8 +40,8 @@
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
-#define DRV_VERSION	"2.6.2"
-#define DRV_RELDATE	"June 5, 2005"
+#define DRV_VERSION	"2.6.3"
+#define DRV_RELDATE	"June 8, 2005"
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
 
@@ -137,6 +141,7 @@
 
 struct bond_params {
 	int mode;
+	int xmit_policy;
 	int miimon;
 	int arp_interval;
 	int use_carrier;
@@ -198,6 +203,7 @@ struct bonding {
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
 	struct   dev_mc_list *mc_list;
+	int      (*xmit_hash_policy)(struct sk_buff *, struct net_device *, int);
 	u32      master_ip;
 	u16      flags;
 	struct   ad_bond_info ad_info;
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 57024ce2c74..84598fa2e9d 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -35,6 +35,9 @@
  *
  * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
  *	- Code cleanup and style changes
+ *
+ * 2005/05/05 - Jason Gabler <jygabler at lbl dot gov>
+ *      - added definitions for various XOR hashing policies
  */
 
 #ifndef _LINUX_IF_BONDING_H
@@ -80,6 +83,10 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+/* hashing types */
+#define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
+#define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ MAC) */
+
 typedef struct ifbond {
 	__s32 bond_mode;
 	__s32 num_slaves;
-- 
cgit v1.2.3-70-g09d2


From 32e9e25ef20789c24ffa1f41489a13932cf82c77 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 26 Jun 2005 15:28:10 -0700
Subject: [ATALK]: Include asm/byteorder.h in linux/atalk.h

We're using __be16 in userland visible types, so we
have to include asm/byteorder.h so that works.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 09a1451c115..911c09cb9bf 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_ATALK_H__
 #define __LINUX_ATALK_H__
 
+#include <asm/byteorder.h>
+
 /*
  * AppleTalk networking structures
  *
-- 
cgit v1.2.3-70-g09d2


From f49d16ef2d6f008119d4ee2c895781fb229bad68 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Sun, 26 Jun 2005 11:36:52 +0200
Subject: [PATCH] forcedeth: Add support for new device id

This is a multi-part message in MIME format.
---
 drivers/net/forcedeth.c | 17 ++++++++++++++++-
 include/linux/pci_ids.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index b471d1a8ffd..64f0f697c95 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -84,6 +84,7 @@
  *	0.32: 16 Apr 2005: RX_ERROR4 handling added.
  *	0.33: 16 May 2005: Support for MCP51 added.
  *	0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics.
+ *	0.35: 26 Jun 2005: Support for MCP55 added.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -95,7 +96,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.34"
+#define FORCEDETH_VERSION		"0.35"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -2284,6 +2285,20 @@ static struct pci_device_id pci_tbl[] = {
 		.subdevice = PCI_ANY_ID,
 		.driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
 	},
+	{	/* MCP55 Ethernet Controller */
+		.vendor = PCI_VENDOR_ID_NVIDIA,
+		.device = PCI_DEVICE_ID_NVIDIA_NVENET_14,
+		.subvendor = PCI_ANY_ID,
+		.subdevice = PCI_ANY_ID,
+		.driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+	},
+	{	/* MCP55 Ethernet Controller */
+		.vendor = PCI_VENDOR_ID_NVIDIA,
+		.device = PCI_DEVICE_ID_NVIDIA_NVENET_15,
+		.subvendor = PCI_ANY_ID,
+		.subdevice = PCI_ANY_ID,
+		.driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER,
+	},
 	{0,},
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index bf608808a60..3af7450278b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1284,6 +1284,8 @@
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2    0x0348
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000       0x034C
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100         0x034E
+#define PCI_DEVICE_ID_NVIDIA_NVENET_14              0x0372
+#define PCI_DEVICE_ID_NVIDIA_NVENET_15              0x0373
 
 #define PCI_VENDOR_ID_IMS		0x10e0
 #define PCI_DEVICE_ID_IMS_8849		0x8849
-- 
cgit v1.2.3-70-g09d2


From ec9f47cd6a14ca069bb7552a984c0a338fc7262b Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 27 Jun 2005 11:12:54 +0100
Subject: [PATCH] Serial: Split 8250 port table

Add separate files for the different 8250 ISA-based serial boards.

Looking across all the various architectures, it seems reasonable that
we can key the availability of the configuration options for these
beasts to the bus-related symbols (iow, CONFIG_ISA).  We also standardise
the base baud/uart clock rate for these boards - I'm sure that isn't
architecture specific, but is solely dependent on the crystal fitted
on the board (which should be the same no matter what type of machine
its fitted into.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/8250.c          | 25 +++++---------
 drivers/serial/8250_accent.c   | 47 ++++++++++++++++++++++++++
 drivers/serial/8250_boca.c     | 61 ++++++++++++++++++++++++++++++++++
 drivers/serial/8250_fourport.c | 53 +++++++++++++++++++++++++++++
 drivers/serial/8250_hub6.c     | 58 ++++++++++++++++++++++++++++++++
 drivers/serial/8250_mca.c      | 64 +++++++++++++++++++++++++++++++++++
 drivers/serial/Kconfig         | 75 +++++++++++++++++++++++++++++++++---------
 drivers/serial/Makefile        |  5 +++
 include/linux/serial_8250.h    |  1 +
 9 files changed, 357 insertions(+), 32 deletions(-)
 create mode 100644 drivers/serial/8250_accent.c
 create mode 100644 drivers/serial/8250_boca.c
 create mode 100644 drivers/serial/8250_fourport.c
 create mode 100644 drivers/serial/8250_hub6.c
 create mode 100644 drivers/serial/8250_mca.c

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index d8b9d2b8c20..34e75bc8f4c 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -77,23 +77,9 @@ static unsigned int share_irqs = SERIAL8250_SHARE_IRQS;
  */
 #define is_real_interrupt(irq)	((irq) != 0)
 
-/*
- * This converts from our new CONFIG_ symbols to the symbols
- * that asm/serial.h expects.  You _NEED_ to comment out the
- * linux/config.h include contained inside asm/serial.h for
- * this to work.
- */
-#undef CONFIG_SERIAL_MANY_PORTS
-#undef CONFIG_SERIAL_DETECT_IRQ
-#undef CONFIG_SERIAL_MULTIPORT
-#undef CONFIG_HUB6
-
 #ifdef CONFIG_SERIAL_8250_DETECT_IRQ
 #define CONFIG_SERIAL_DETECT_IRQ 1
 #endif
-#ifdef CONFIG_SERIAL_8250_MULTIPORT
-#define CONFIG_SERIAL_MULTIPORT 1
-#endif
 #ifdef CONFIG_SERIAL_8250_MANY_PORTS
 #define CONFIG_SERIAL_MANY_PORTS 1
 #endif
@@ -2323,10 +2309,11 @@ static int __devinit serial8250_probe(struct device *dev)
 {
 	struct plat_serial8250_port *p = dev->platform_data;
 	struct uart_port port;
+	int ret, i;
 
 	memset(&port, 0, sizeof(struct uart_port));
 
-	for (; p && p->flags != 0; p++) {
+	for (i = 0; p && p->flags != 0; p++, i++) {
 		port.iobase	= p->iobase;
 		port.membase	= p->membase;
 		port.irq	= p->irq;
@@ -2335,10 +2322,16 @@ static int __devinit serial8250_probe(struct device *dev)
 		port.iotype	= p->iotype;
 		port.flags	= p->flags;
 		port.mapbase	= p->mapbase;
+		port.hub6	= p->hub6;
 		port.dev	= dev;
 		if (share_irqs)
 			port.flags |= UPF_SHARE_IRQ;
-		serial8250_register_port(&port);
+		ret = serial8250_register_port(&port);
+		if (ret < 0) {
+			dev_err(dev, "unable to register port at index %d "
+				"(IO%lx MEM%lx IRQ%d): %d\n", i,
+				p->iobase, p->mapbase, p->irq, ret);
+		}
 	}
 	return 0;
 }
diff --git a/drivers/serial/8250_accent.c b/drivers/serial/8250_accent.c
new file mode 100644
index 00000000000..1f2c276063e
--- /dev/null
+++ b/drivers/serial/8250_accent.c
@@ -0,0 +1,47 @@
+/*
+ *  linux/drivers/serial/8250_accent.c
+ *
+ *  Copyright (C) 2005 Russell King.
+ *  Data taken from include/asm-i386/serial.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/serial_8250.h>
+
+#define PORT(_base,_irq)				\
+	{						\
+		.iobase		= _base,		\
+		.irq		= _irq,			\
+		.uartclk	= 1843200,		\
+		.iotype		= UPIO_PORT,		\
+		.flags		= UPF_BOOT_AUTOCONF,	\
+	}
+
+static struct plat_serial8250_port accent_data[] = {
+	PORT(0x330, 4),
+	PORT(0x338, 4),
+	{ },
+};
+
+static struct platform_device accent_device = {
+	.name			= "serial8250",
+	.id			= 2,
+	.dev			= {
+		.platform_data	= accent_data,
+	},
+};
+
+static int __init accent_init(void)
+{
+	return platform_device_register(&accent_device);
+}
+
+module_init(accent_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("8250 serial probe module for Accent Async cards");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250_boca.c b/drivers/serial/8250_boca.c
new file mode 100644
index 00000000000..465c9ea1e7a
--- /dev/null
+++ b/drivers/serial/8250_boca.c
@@ -0,0 +1,61 @@
+/*
+ *  linux/drivers/serial/8250_boca.c
+ *
+ *  Copyright (C) 2005 Russell King.
+ *  Data taken from include/asm-i386/serial.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/serial_8250.h>
+
+#define PORT(_base,_irq)				\
+	{						\
+		.iobase		= _base,		\
+		.irq		= _irq,			\
+		.uartclk	= 1843200,		\
+		.iotype		= UPIO_PORT,		\
+		.flags		= UPF_BOOT_AUTOCONF,	\
+	}
+
+static struct plat_serial8250_port boca_data[] = {
+	PORT(0x100, 12),
+	PORT(0x108, 12),
+	PORT(0x110, 12),
+	PORT(0x118, 12),
+	PORT(0x120, 12),
+	PORT(0x128, 12),
+	PORT(0x130, 12),
+	PORT(0x138, 12),
+	PORT(0x140, 12),
+	PORT(0x148, 12),
+	PORT(0x150, 12),
+	PORT(0x158, 12),
+	PORT(0x160, 12),
+	PORT(0x168, 12),
+	PORT(0x170, 12),
+	PORT(0x178, 12),
+	{ },
+};
+
+static struct platform_device boca_device = {
+	.name			= "serial8250",
+	.id			= 3,
+	.dev			= {
+		.platform_data	= boca_data,
+	},
+};
+
+static int __init boca_init(void)
+{
+	return platform_device_register(&boca_device);
+}
+
+module_init(boca_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("8250 serial probe module for Boca cards");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250_fourport.c b/drivers/serial/8250_fourport.c
new file mode 100644
index 00000000000..e9b4d908ef4
--- /dev/null
+++ b/drivers/serial/8250_fourport.c
@@ -0,0 +1,53 @@
+/*
+ *  linux/drivers/serial/8250_fourport.c
+ *
+ *  Copyright (C) 2005 Russell King.
+ *  Data taken from include/asm-i386/serial.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/serial_8250.h>
+
+#define PORT(_base,_irq)						\
+	{								\
+		.iobase		= _base,				\
+		.irq		= _irq,					\
+		.uartclk	= 1843200,				\
+		.iotype		= UPIO_PORT,				\
+		.flags		= UPF_BOOT_AUTOCONF | UPF_FOURPORT,	\
+	}
+
+static struct plat_serial8250_port fourport_data[] = {
+	PORT(0x1a0, 9),
+	PORT(0x1a8, 9),
+	PORT(0x1b0, 9),
+	PORT(0x1b8, 9),
+	PORT(0x2a0, 5),
+	PORT(0x2a8, 5),
+	PORT(0x2b0, 5),
+	PORT(0x2b8, 5),
+	{ },
+};
+
+static struct platform_device fourport_device = {
+	.name			= "serial8250",
+	.id			= 1,
+	.dev			= {
+		.platform_data	= fourport_data,
+	},
+};
+
+static int __init fourport_init(void)
+{
+	return platform_device_register(&fourport_device);
+}
+
+module_init(fourport_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("8250 serial probe module for AST Fourport cards");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250_hub6.c b/drivers/serial/8250_hub6.c
new file mode 100644
index 00000000000..77f396f84b4
--- /dev/null
+++ b/drivers/serial/8250_hub6.c
@@ -0,0 +1,58 @@
+/*
+ *  linux/drivers/serial/8250_hub6.c
+ *
+ *  Copyright (C) 2005 Russell King.
+ *  Data taken from include/asm-i386/serial.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/serial_8250.h>
+
+#define HUB6(card,port)							\
+	{								\
+		.iobase		= 0x302,				\
+		.irq		= 3,					\
+		.uartclk	= 1843200,				\
+		.iotype		= UPIO_HUB6,				\
+		.flags		= UPF_BOOT_AUTOCONF,			\
+		.hub6		= (card) << 6 | (port) << 3 | 1,	\
+	}
+
+static struct plat_serial8250_port hub6_data[] = {
+	HUB6(0,0),
+	HUB6(0,1),
+	HUB6(0,2),
+	HUB6(0,3),
+	HUB6(0,4),
+	HUB6(0,5),
+	HUB6(1,0),
+	HUB6(1,1),
+	HUB6(1,2),
+	HUB6(1,3),
+	HUB6(1,4),
+	HUB6(1,5),
+	{ },
+};
+
+static struct platform_device hub6_device = {
+	.name			= "serial8250",
+	.id			= 4,
+	.dev			= {
+		.platform_data	= hub6_data,
+	},
+};
+
+static int __init hub6_init(void)
+{
+	return platform_device_register(&hub6_device);
+}
+
+module_init(hub6_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("8250 serial probe module for Hub6 cards");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250_mca.c b/drivers/serial/8250_mca.c
new file mode 100644
index 00000000000..f0c40d68b8c
--- /dev/null
+++ b/drivers/serial/8250_mca.c
@@ -0,0 +1,64 @@
+/*
+ *  linux/drivers/serial/8250_mca.c
+ *
+ *  Copyright (C) 2005 Russell King.
+ *  Data taken from include/asm-i386/serial.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mca.h>
+#include <linux/serial_8250.h>
+
+/*
+ * FIXME: Should we be doing AUTO_IRQ here?
+ */
+#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
+#define MCA_FLAGS	UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ
+#else
+#define MCA_FLAGS	UPF_BOOT_AUTOCONF | UPF_SKIP_TEST
+#endif
+
+#define PORT(_base,_irq)			\
+	{					\
+		.iobase		= _base,	\
+		.irq		= _irq,		\
+		.uartclk	= 1843200,	\
+		.iotype		= UPIO_PORT,	\
+		.flags		= MCA_FLAGS,	\
+	}
+
+static struct plat_serial8250_port mca_data[] = {
+	PORT(0x3220, 3),
+	PORT(0x3228, 3),
+	PORT(0x4220, 3),
+	PORT(0x4228, 3),
+	PORT(0x5220, 3),
+	PORT(0x5228, 3),
+	{ },
+};
+
+static struct platform_device mca_device = {
+	.name			= "serial8250",
+	.id			= 5,
+	.dev			= {
+		.platform_data	= mca_data,
+	},
+};
+
+static int __init mca_init(void)
+{
+	if (!MCA_bus)
+		return -ENODEV;
+	return platform_device_register(&mca_device);
+}
+
+module_init(mca_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("8250 serial probe module for MCA ports");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 25fcef2c42d..e879bce160d 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -89,11 +89,11 @@ config SERIAL_8250_NR_UARTS
 	int "Maximum number of non-legacy 8250/16550 serial ports"
 	depends on SERIAL_8250
 	default "4"
-	---help---
-	  Set this to the number of non-legacy serial ports you want
-	  the driver to support.  This includes any ports discovered
-	  via ACPI or PCI enumeration and any ports that may be added
-	  at run-time via hot-plug.
+	help
+	  Set this to the number of serial ports you want the driver
+	  to support.  This includes any ports discovered via ACPI or
+	  PCI enumeration and any ports that may be added at run-time
+	  via hot-plug, or any ISA multi-port serial cards.
 
 config SERIAL_8250_EXTENDED
 	bool "Extended 8250/16550 serial driver options"
@@ -141,31 +141,74 @@ config SERIAL_8250_DETECT_IRQ
 
 	  If unsure, say N.
 
-config SERIAL_8250_MULTIPORT
-	bool "Support special multiport boards"
-	depends on SERIAL_8250_EXTENDED
-	help
-	  Some multiport serial ports have special ports which are used to
-	  signal when there are any serial ports on the board which need
-	  servicing. Say Y here to enable the serial driver to take advantage
-	  of those special I/O ports.
-
 config SERIAL_8250_RSA
 	bool "Support RSA serial ports"
 	depends on SERIAL_8250_EXTENDED
 	help
 	  ::: To be written :::
 
-comment "Non-8250 serial port support"
+#
+# Multi-port serial cards
+#
+
+config SERIAL_8250_FOURPORT
+	tristate "Support Fourport cards"
+	depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS
+	help
+	  Say Y here if you have an AST FourPort serial board.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called 8250_fourport.
+
+config SERIAL_8250_ACCENT
+	tristate "Support Accent cards"
+	depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS
+	help
+	  Say Y here if you have an Accent Async serial board.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called 8250_accent.
+
+
+config SERIAL_8250_BOCA
+	tristate "Support Boca cards"
+	depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS
+	help
+	  Say Y here if you have a Boca serial board.  Please read the Boca
+	  mini-HOWTO, avaialble from <http://www.tldp.org/docs.html#howto>
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called 8250_boca.
+
+
+config SERIAL_8250_HUB6
+	tristate "Support Hub6 cards"
+	depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS
+	help
+	  Say Y here if you have a HUB6 serial board.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called 8250_hub6.
+
+config SERIAL_8250_MCA
+	tristate "Support 8250-type ports on MCA buses"
+	depends on SERIAL_8250 != n && MCA
+	help
+	  Say Y here if you have a MCA serial ports.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called 8250_mca.
 
 config SERIAL_8250_ACORN
 	tristate "Acorn expansion card serial port support"
-	depends on ARM && ARCH_ACORN && SERIAL_8250
+	depends on ARCH_ACORN && SERIAL_8250
 	help
 	  If you have an Atomwide Serial card or Serial Port card for an Acorn
 	  system, say Y to this option.  The driver can handle 1, 2, or 3 port
 	  cards.  If unsure, say N.
 
+comment "Non-8250 serial port support"
+
 config SERIAL_AMBA_PL010
 	tristate "ARM AMBA PL010 serial port support"
 	depends on ARM_AMBA
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 8f1cdde7dbe..65bd4381685 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -17,6 +17,11 @@ obj-$(CONFIG_SERIAL_8250) += 8250.o $(serial-8250-y)
 obj-$(CONFIG_SERIAL_8250_CS) += serial_cs.o
 obj-$(CONFIG_SERIAL_8250_ACORN) += 8250_acorn.o
 obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o
+obj-$(CONFIG_SERIAL_8250_FOURPORT) += 8250_fourport.o
+obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o
+obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o
+obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o
+obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o
 obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
 obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
 obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 823181af6dd..3e3c1fa35b0 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -22,6 +22,7 @@ struct plat_serial8250_port {
 	unsigned int	uartclk;	/* UART clock rate */
 	unsigned char	regshift;	/* register shift */
 	unsigned char	iotype;		/* UPIO_* */
+	unsigned char	hub6;
 	unsigned int	flags;		/* UPF_* flags */
 };
 
-- 
cgit v1.2.3-70-g09d2


From 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 27 Jun 2005 10:55:12 +0200
Subject: [PATCH] Update cfq io scheduler to time sliced design

This updates the CFQ io scheduler to the new time sliced design (cfq
v3).  It provides full process fairness, while giving excellent
aggregate system throughput even for many competing processes.  It
supports io priorities, either inherited from the cpu nice value or set
directly with the ioprio_get/set syscalls.  The latter closely mimic
set/getpriority.

This import is based on my latest from -mm.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |    2 +
 arch/ia64/kernel/entry.S         |    4 +-
 arch/ppc/kernel/misc.S           |    2 +
 drivers/block/as-iosched.c       |    5 +-
 drivers/block/cfq-iosched.c      | 1910 +++++++++++++++++++++++++-------------
 drivers/block/deadline-iosched.c |    3 +-
 drivers/block/elevator.c         |    9 +-
 drivers/block/ll_rw_blk.c        |   59 +-
 fs/Makefile                      |    1 +
 fs/ioprio.c                      |  172 ++++
 fs/reiserfs/journal.c            |   12 +
 include/asm-i386/unistd.h        |    4 +-
 include/asm-ia64/unistd.h        |    2 +
 include/asm-ppc/unistd.h         |    4 +-
 include/asm-x86_64/unistd.h      |    6 +-
 include/linux/bio.h              |   14 +
 include/linux/blkdev.h           |   25 +-
 include/linux/elevator.h         |    8 +-
 include/linux/fs.h               |   19 +
 include/linux/init_task.h        |    2 +
 include/linux/ioprio.h           |   87 ++
 include/linux/sched.h            |    6 +-
 include/linux/writeback.h        |    6 +-
 kernel/exit.c                    |    2 +
 kernel/fork.c                    |    5 +
 kernel/sched.c                   |    8 -
 26 files changed, 1685 insertions(+), 692 deletions(-)
 create mode 100644 fs/ioprio.c
 create mode 100644 include/linux/ioprio.h

(limited to 'include/linux')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 442a6e937b1..3db9a04aec6 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -289,3 +289,5 @@ ENTRY(sys_call_table)
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get		/* 290 */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index b1d5d3d5276..785a51b0ad8 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1577,8 +1577,8 @@ sys_call_table:
 	data8 sys_add_key
 	data8 sys_request_key
 	data8 sys_keyctl
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall			// 1275
+	data8 sys_ioprio_set
+	data8 sys_ioprio_get			// 1275
 	data8 sys_set_zone_reclaim
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index b6a63a49a23..191a8def3bd 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table)
 	.long sys_request_key		/* 270 */
 	.long sys_keyctl
 	.long sys_waitid
+	.long sys_ioprio_set
+	.long sys_ioprio_get
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 3410b4d294b..91aeb678135 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 	rq->elevator_private = NULL;
 }
 
-static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int as_set_request(request_queue_t *q, struct request *rq,
+			  struct bio *bio, int gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
 	return 1;
 }
 
-static int as_may_queue(request_queue_t *q, int rw)
+static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 3ac47dde64d..35f6e569d5e 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -21,22 +21,33 @@
 #include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/mempool.h>
-
-static unsigned long max_elapsed_crq;
-static unsigned long max_elapsed_dispatch;
+#include <linux/ioprio.h>
+#include <linux/writeback.h>
 
 /*
  * tunables
  */
 static int cfq_quantum = 4;		/* max queue in one round of service */
 static int cfq_queued = 8;		/* minimum rq allocate limit per-queue*/
-static int cfq_service = HZ;		/* period over which service is avg */
-static int cfq_fifo_expire_r = HZ / 2;	/* fifo timeout for sync requests */
-static int cfq_fifo_expire_w = 5 * HZ;	/* fifo timeout for async requests */
-static int cfq_fifo_rate = HZ / 8;	/* fifo expiry rate */
+static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
 static int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 
+static int cfq_slice_sync = HZ / 10;
+static int cfq_slice_async = HZ / 50;
+static int cfq_slice_async_rq = 2;
+static int cfq_slice_idle = HZ / 50;
+
+#define CFQ_IDLE_GRACE		(HZ / 10)
+#define CFQ_SLICE_SCALE		(5)
+
+#define CFQ_KEY_ASYNC		(0)
+
+/*
+ * disable queueing at the driver/hardware level
+ */
+static int cfq_max_depth = 1;
+
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -55,6 +66,7 @@ static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 #define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
 
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
+#define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define RQ_DATA(rq)		(rq)->elevator_private
 
@@ -75,78 +87,101 @@ static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
-/*
- * threshold for switching off non-tag accounting
- */
-#define CFQ_MAX_TAG		(4)
-
-/*
- * sort key types and names
- */
-enum {
-	CFQ_KEY_PGID,
-	CFQ_KEY_TGID,
-	CFQ_KEY_UID,
-	CFQ_KEY_GID,
-	CFQ_KEY_LAST,
-};
-
-static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
-
 static kmem_cache_t *crq_pool;
 static kmem_cache_t *cfq_pool;
 static kmem_cache_t *cfq_ioc_pool;
 
+#define CFQ_PRIO_LISTS		IOPRIO_BE_NR
+#define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_class_be(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
+#define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define cfq_cfqq_sync(cfqq)	((cfqq)->key != CFQ_KEY_ASYNC)
+
+/*
+ * Per block device queue structure
+ */
 struct cfq_data {
-	struct list_head rr_list;
+	atomic_t ref;
+	request_queue_t *queue;
+
+	/*
+	 * rr list of queues with requests and the count of them
+	 */
+	struct list_head rr_list[CFQ_PRIO_LISTS];
+	struct list_head busy_rr;
+	struct list_head cur_rr;
+	struct list_head idle_rr;
+	unsigned int busy_queues;
+
+	/*
+	 * non-ordered list of empty cfqq's
+	 */
 	struct list_head empty_list;
 
+	/*
+	 * cfqq lookup hash
+	 */
 	struct hlist_head *cfq_hash;
-	struct hlist_head *crq_hash;
 
-	/* queues on rr_list (ie they have pending requests */
-	unsigned int busy_queues;
+	/*
+	 * global crq hash for all queues
+	 */
+	struct hlist_head *crq_hash;
 
 	unsigned int max_queued;
 
-	atomic_t ref;
+	mempool_t *crq_pool;
 
-	int key_type;
+	int rq_in_driver;
 
-	mempool_t *crq_pool;
+	/*
+	 * schedule slice state info
+	 */
+	/*
+	 * idle window management
+	 */
+	struct timer_list idle_slice_timer;
+	struct work_struct unplug_work;
 
-	request_queue_t *queue;
+	struct cfq_queue *active_queue;
+	struct cfq_io_context *active_cic;
+	int cur_prio, cur_end_prio;
+	unsigned int dispatch_slice;
+
+	struct timer_list idle_class_timer;
 
 	sector_t last_sector;
+	unsigned long last_end_request;
 
-	int rq_in_driver;
+	unsigned int rq_starved;
 
 	/*
 	 * tunables, see top of file
 	 */
 	unsigned int cfq_quantum;
 	unsigned int cfq_queued;
-	unsigned int cfq_fifo_expire_r;
-	unsigned int cfq_fifo_expire_w;
-	unsigned int cfq_fifo_batch_expire;
+	unsigned int cfq_fifo_expire[2];
 	unsigned int cfq_back_penalty;
 	unsigned int cfq_back_max;
-	unsigned int find_best_crq;
-
-	unsigned int cfq_tagged;
+	unsigned int cfq_slice[2];
+	unsigned int cfq_slice_async_rq;
+	unsigned int cfq_slice_idle;
+	unsigned int cfq_max_depth;
 };
 
+/*
+ * Per process-grouping structure
+ */
 struct cfq_queue {
 	/* reference count */
 	atomic_t ref;
 	/* parent cfq_data */
 	struct cfq_data *cfqd;
-	/* hash of mergeable requests */
+	/* cfqq lookup hash */
 	struct hlist_node cfq_hash;
 	/* hash key */
-	unsigned long key;
-	/* whether queue is on rr (or empty) list */
-	int on_rr;
+	unsigned int key;
 	/* on either rr or empty list of cfqd */
 	struct list_head cfq_list;
 	/* sorted list of pending requests */
@@ -158,21 +193,35 @@ struct cfq_queue {
 	/* currently allocated requests */
 	int allocated[2];
 	/* fifo list of requests in sort_list */
-	struct list_head fifo[2];
-	/* last time fifo expired */
-	unsigned long last_fifo_expire;
-
-	int key_type;
-
-	unsigned long service_start;
-	unsigned long service_used;
+	struct list_head fifo;
 
-	unsigned int max_rate;
+	unsigned long slice_start;
+	unsigned long slice_end;
+	unsigned long slice_left;
+	unsigned long service_last;
 
 	/* number of requests that have been handed to the driver */
 	int in_flight;
-	/* number of currently allocated requests */
-	int alloc_limit[2];
+
+	/* io prio of this group */
+	unsigned short ioprio, org_ioprio;
+	unsigned short ioprio_class, org_ioprio_class;
+
+	/* whether queue is on rr (or empty) list */
+	unsigned on_rr : 1;
+	/* idle slice, waiting for new request submission */
+	unsigned wait_request : 1;
+	/* set when wait_request gets set, reset on first rq alloc */
+	unsigned must_alloc : 1;
+	/* only gets one must_alloc per slice */
+	unsigned must_alloc_slice : 1;
+	/* idle slice, request added, now waiting to dispatch it */
+	unsigned must_dispatch : 1;
+	/* fifo expire per-slice */
+	unsigned fifo_expire : 1;
+
+	unsigned idle_window : 1;
+	unsigned prio_changed : 1;
 };
 
 struct cfq_rq {
@@ -184,42 +233,17 @@ struct cfq_rq {
 	struct cfq_queue *cfq_queue;
 	struct cfq_io_context *io_context;
 
-	unsigned long service_start;
-	unsigned long queue_start;
-
-	unsigned int in_flight : 1;
-	unsigned int accounted : 1;
-	unsigned int is_sync   : 1;
-	unsigned int is_write  : 1;
+	unsigned in_flight : 1;
+	unsigned accounted : 1;
+	unsigned is_sync   : 1;
+	unsigned requeued  : 1;
 };
 
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int);
 static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
-static void cfq_update_next_crq(struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 
-/*
- * what the fairness is based on (ie how processes are grouped and
- * differentiated)
- */
-static inline unsigned long
-cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk)
-{
-	/*
-	 * optimize this so that ->key_type is the offset into the struct
-	 */
-	switch (cfqd->key_type) {
-		case CFQ_KEY_PGID:
-			return process_group(tsk);
-		default:
-		case CFQ_KEY_TGID:
-			return tsk->tgid;
-		case CFQ_KEY_UID:
-			return tsk->uid;
-		case CFQ_KEY_GID:
-			return tsk->gid;
-	}
-}
+#define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
 
 /*
  * lots of deadline iosched dupes, can be abstracted later...
@@ -235,16 +259,12 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
 
 	if (q->last_merge == crq->request)
 		q->last_merge = NULL;
-
-	cfq_update_next_crq(crq);
 }
 
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
 
-	BUG_ON(!hlist_unhashed(&crq->hash));
-
 	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
 }
 
@@ -257,8 +277,6 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 		struct cfq_rq *crq = list_entry_hash(entry);
 		struct request *__rq = crq->request;
 
-		BUG_ON(hlist_unhashed(&crq->hash));
-
 		if (!rq_mergeable(__rq)) {
 			cfq_del_crq_hash(crq);
 			continue;
@@ -287,36 +305,16 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 		return crq2;
 	if (crq2 == NULL)
 		return crq1;
+	if (crq1->requeued)
+		return crq1;
+	if (crq2->requeued)
+		return crq2;
 
 	s1 = crq1->request->sector;
 	s2 = crq2->request->sector;
 
 	last = cfqd->last_sector;
 
-#if 0
-	if (!list_empty(&cfqd->queue->queue_head)) {
-		struct list_head *entry = &cfqd->queue->queue_head;
-		unsigned long distance = ~0UL;
-		struct request *rq;
-
-		while ((entry = entry->prev) != &cfqd->queue->queue_head) {
-			rq = list_entry_rq(entry);
-
-			if (blk_barrier_rq(rq))
-				break;
-
-			if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
-				distance = abs(s1 - rq->sector +rq->nr_sectors);
-				last = rq->sector + rq->nr_sectors;
-			}
-			if (distance < abs(s2 - rq->sector + rq->nr_sectors)) {
-				distance = abs(s2 - rq->sector +rq->nr_sectors);
-				last = rq->sector + rq->nr_sectors;
-			}
-		}
-	}
-#endif
-
 	/*
 	 * by definition, 1KiB is 2 sectors
 	 */
@@ -377,11 +375,13 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
 	struct rb_node *rbnext, *rbprev;
 
-	if (!ON_RB(&last->rb_node))
-		return NULL;
-
-	if ((rbnext = rb_next(&last->rb_node)) == NULL)
+	if (ON_RB(&last->rb_node))
+		rbnext = rb_next(&last->rb_node);
+	else {
 		rbnext = rb_first(&cfqq->sort_list);
+		if (rbnext == &last->rb_node)
+			rbnext = NULL;
+	}
 
 	rbprev = rb_prev(&last->rb_node);
 
@@ -401,67 +401,53 @@ static void cfq_update_next_crq(struct cfq_rq *crq)
 		cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
 }
 
-static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 {
-	struct list_head *head = &cfqq->cfqd->rr_list;
-	struct list_head *next, *prev;
-
-	/*
-	 * list might still be ordered
-	 */
-	next = cfqq->cfq_list.next;
-	if (next != head) {
-		struct cfq_queue *cnext = list_entry_cfqq(next);
+	struct cfq_data *cfqd = cfqq->cfqd;
+	struct list_head *list, *entry;
 
-		if (cfqq->service_used > cnext->service_used)
-			return 1;
-	}
+	BUG_ON(!cfqq->on_rr);
 
-	prev = cfqq->cfq_list.prev;
-	if (prev != head) {
-		struct cfq_queue *cprev = list_entry_cfqq(prev);
+	list_del(&cfqq->cfq_list);
 
-		if (cfqq->service_used < cprev->service_used)
-			return 1;
+	if (cfq_class_rt(cfqq))
+		list = &cfqd->cur_rr;
+	else if (cfq_class_idle(cfqq))
+		list = &cfqd->idle_rr;
+	else {
+		/*
+		 * if cfqq has requests in flight, don't allow it to be
+		 * found in cfq_set_active_queue before it has finished them.
+		 * this is done to increase fairness between a process that
+		 * has lots of io pending vs one that only generates one
+		 * sporadically or synchronously
+		 */
+		if (cfqq->in_flight)
+			list = &cfqd->busy_rr;
+		else
+			list = &cfqd->rr_list[cfqq->ioprio];
 	}
 
-	return 0;
-}
-
-static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
-{
-	struct list_head *entry = &cfqq->cfqd->rr_list;
-
-	if (!cfqq->on_rr)
-		return;
-	if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+	/*
+	 * if queue was preempted, just add to front to be fair. busy_rr
+	 * isn't sorted.
+	 */
+	if (preempted || list == &cfqd->busy_rr) {
+		list_add(&cfqq->cfq_list, list);
 		return;
-
-	list_del(&cfqq->cfq_list);
+	}
 
 	/*
-	 * sort by our mean service_used, sub-sort by in-flight requests
+	 * sort by when queue was last serviced
 	 */
-	while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
+	entry = list;
+	while ((entry = entry->prev) != list) {
 		struct cfq_queue *__cfqq = list_entry_cfqq(entry);
 
-		if (cfqq->service_used > __cfqq->service_used)
+		if (!__cfqq->service_last)
+			break;
+		if (time_before(__cfqq->service_last, cfqq->service_last))
 			break;
-		else if (cfqq->service_used == __cfqq->service_used) {
-			struct list_head *prv;
-
-			while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
-				__cfqq = list_entry_cfqq(prv);
-
-				WARN_ON(__cfqq->service_used > cfqq->service_used);
-				if (cfqq->service_used != __cfqq->service_used)
-					break;
-				if (cfqq->in_flight > __cfqq->in_flight)
-					break;
-
-				entry = prv;
-			}
-		}
 	}
 
 	list_add(&cfqq->cfq_list, entry);
@@ -469,28 +455,24 @@ static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
 
 /*
  * add to busy list of queues for service, trying to be fair in ordering
- * the pending list according to requests serviced
+ * the pending list according to last request service
  */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
 {
-	/*
-	 * it's currently on the empty list
-	 */
+	BUG_ON(cfqq->on_rr);
 	cfqq->on_rr = 1;
 	cfqd->busy_queues++;
 
-	if (time_after(jiffies, cfqq->service_start + cfq_service))
-		cfqq->service_used >>= 3;
-
-	cfq_sort_rr_list(cfqq, 1);
+	cfq_resort_rr_list(cfqq, requeue);
 }
 
 static inline void
 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	list_move(&cfqq->cfq_list, &cfqd->empty_list);
+	BUG_ON(!cfqq->on_rr);
 	cfqq->on_rr = 0;
+	list_move(&cfqq->cfq_list, &cfqd->empty_list);
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
@@ -505,16 +487,17 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 
 	if (ON_RB(&crq->rb_node)) {
 		struct cfq_data *cfqd = cfqq->cfqd;
+		const int sync = crq->is_sync;
 
-		BUG_ON(!cfqq->queued[crq->is_sync]);
+		BUG_ON(!cfqq->queued[sync]);
+		cfqq->queued[sync]--;
 
 		cfq_update_next_crq(crq);
 
-		cfqq->queued[crq->is_sync]--;
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
 		RB_CLEAR_COLOR(&crq->rb_node);
 
-		if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
+		if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list))
 			cfq_del_cfqq_rr(cfqd, cfqq);
 	}
 }
@@ -562,7 +545,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
 	if (!cfqq->on_rr)
-		cfq_add_cfqq_rr(cfqd, cfqq);
+		cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -581,11 +564,10 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 	cfq_add_crq_rb(crq);
 }
 
-static struct request *
-cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+
 {
-	const unsigned long key = cfq_hash_key(cfqd, current);
-	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key);
+	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid);
 	struct rb_node *n;
 
 	if (!cfqq)
@@ -609,20 +591,23 @@ out:
 
 static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(rq);
 
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		if (cfqq->cfqd->cfq_tagged) {
-			cfqq->service_used--;
-			cfq_sort_rr_list(cfqq, 0);
-		}
-
 		if (crq->accounted) {
 			crq->accounted = 0;
-			cfqq->cfqd->rq_in_driver--;
+			WARN_ON(!cfqd->rq_in_driver);
+			cfqd->rq_in_driver--;
 		}
+		if (crq->in_flight) {
+			crq->in_flight = 0;
+			WARN_ON(!cfqq->in_flight);
+			cfqq->in_flight--;
+		}
+		crq->requeued = 1;
 	}
 }
 
@@ -640,11 +625,10 @@ static void cfq_remove_request(request_queue_t *q, struct request *rq)
 	struct cfq_rq *crq = RQ_DATA(rq);
 
 	if (crq) {
-		cfq_remove_merge_hints(q, crq);
 		list_del_init(&rq->queuelist);
+		cfq_del_crq_rb(crq);
+		cfq_remove_merge_hints(q, crq);
 
-		if (crq->cfq_queue)
-			cfq_del_crq_rb(crq);
 	}
 }
 
@@ -662,21 +646,15 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	}
 
 	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		ret = ELEVATOR_BACK_MERGE;
+		goto out;
 	}
 
 	__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
-	if (__rq) {
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_FRONT_MERGE;
-			goto out;
-		}
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		ret = ELEVATOR_FRONT_MERGE;
+		goto out;
 	}
 
 	return ELEVATOR_NO_MERGE;
@@ -709,20 +687,194 @@ static void
 cfq_merged_requests(request_queue_t *q, struct request *rq,
 		    struct request *next)
 {
-	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_rq *cnext = RQ_DATA(next);
-
 	cfq_merged_request(q, rq);
 
-	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
-		if (time_before(cnext->queue_start, crq->queue_start)) {
-			list_move(&rq->queuelist, &next->queuelist);
-			crq->queue_start = cnext->queue_start;
+	/*
+	 * reposition in fifo if next is older than rq
+	 */
+	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+	    time_before(next->start_time, rq->start_time))
+		list_move(&rq->queuelist, &next->queuelist);
+
+	cfq_remove_request(q, next);
+}
+
+static inline void
+__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	if (cfqq) {
+		/*
+		 * stop potential idle class queues waiting service
+		 */
+		del_timer(&cfqd->idle_class_timer);
+
+		cfqq->slice_start = jiffies;
+		cfqq->slice_end = 0;
+		cfqq->slice_left = 0;
+		cfqq->must_alloc_slice = 0;
+		cfqq->fifo_expire = 0;
+	}
+
+	cfqd->active_queue = cfqq;
+}
+
+/*
+ * 0
+ * 0,1
+ * 0,1,2
+ * 0,1,2,3
+ * 0,1,2,3,4
+ * 0,1,2,3,4,5
+ * 0,1,2,3,4,5,6
+ * 0,1,2,3,4,5,6,7
+ */
+static int cfq_get_next_prio_level(struct cfq_data *cfqd)
+{
+	int prio, wrap;
+
+	prio = -1;
+	wrap = 0;
+	do {
+		int p;
+
+		for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
+			if (!list_empty(&cfqd->rr_list[p])) {
+				prio = p;
+				break;
+			}
+		}
+
+		if (prio != -1)
+			break;
+		cfqd->cur_prio = 0;
+		if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+			cfqd->cur_end_prio = 0;
+			if (wrap)
+				break;
+			wrap = 1;
 		}
+	} while (1);
+
+	if (unlikely(prio == -1))
+		return -1;
+
+	BUG_ON(prio >= CFQ_PRIO_LISTS);
+
+	list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
+
+	cfqd->cur_prio = prio + 1;
+	if (cfqd->cur_prio > cfqd->cur_end_prio) {
+		cfqd->cur_end_prio = cfqd->cur_prio;
+		cfqd->cur_prio = 0;
+	}
+	if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+		cfqd->cur_prio = 0;
+		cfqd->cur_end_prio = 0;
 	}
 
-	cfq_update_next_crq(cnext);
-	cfq_remove_request(q, next);
+	return prio;
+}
+
+static void cfq_set_active_queue(struct cfq_data *cfqd)
+{
+	struct cfq_queue *cfqq = NULL;
+
+	/*
+	 * if current list is non-empty, grab first entry. if it is empty,
+	 * get next prio level and grab first entry then if any are spliced
+	 */
+	if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+		cfqq = list_entry_cfqq(cfqd->cur_rr.next);
+
+	/*
+	 * if we have idle queues and no rt or be queues had pending
+	 * requests, either allow immediate service if the grace period
+	 * has passed or arm the idle grace timer
+	 */
+	if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+		unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+
+		if (time_after_eq(jiffies, end))
+			cfqq = list_entry_cfqq(cfqd->idle_rr.next);
+		else
+			mod_timer(&cfqd->idle_class_timer, end);
+	}
+
+	__cfq_set_active_queue(cfqd, cfqq);
+}
+
+/*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfqq) {
+		unsigned long now = jiffies;
+
+		if (cfqq->wait_request)
+			del_timer(&cfqd->idle_slice_timer);
+
+		if (!preempted && !cfqq->in_flight)
+			cfqq->service_last = now;
+
+		cfqq->must_dispatch = 0;
+		cfqq->wait_request = 0;
+
+		/*
+		 * store what was left of this slice, if the queue idled out
+		 * or was preempted
+		 */
+		if (time_after(now, cfqq->slice_end))
+			cfqq->slice_left = now - cfqq->slice_end;
+		else
+			cfqq->slice_left = 0;
+
+		if (cfqq->on_rr)
+			cfq_resort_rr_list(cfqq, preempted);
+
+		cfqd->active_queue = NULL;
+
+		if (cfqd->active_cic) {
+			put_io_context(cfqd->active_cic->ioc);
+			cfqd->active_cic = NULL;
+		}
+	}
+
+	cfqd->dispatch_slice = 0;
+}
+
+static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+
+{
+	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
+	WARN_ON(cfqq != cfqd->active_queue);
+
+	/*
+	 * idle is disabled, either manually or by past process history
+	 */
+	if (!cfqd->cfq_slice_idle)
+		return 0;
+	if (!cfqq->idle_window)
+		return 0;
+	/*
+	 * task has exited, don't wait
+	 */
+	if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
+		return 0;
+
+	cfqq->wait_request = 1;
+	cfqq->must_alloc = 1;
+
+	if (!timer_pending(&cfqd->idle_slice_timer)) {
+		unsigned long slice_left = cfqq->slice_end - 1;
+
+		cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left);
+		add_timer(&cfqd->idle_slice_timer);
+	}
+
+	return 1;
 }
 
 /*
@@ -738,31 +890,39 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
 	struct request *__rq;
 	sector_t last;
 
-	cfq_del_crq_rb(crq);
-	cfq_remove_merge_hints(q, crq);
 	list_del(&crq->request->queuelist);
 
 	last = cfqd->last_sector;
-	while ((entry = entry->prev) != head) {
-		__rq = list_entry_rq(entry);
+	list_for_each_entry_reverse(__rq, head, queuelist) {
+		struct cfq_rq *__crq = RQ_DATA(__rq);
 
-		if (blk_barrier_rq(crq->request))
+		if (blk_barrier_rq(__rq))
+			break;
+		if (!blk_fs_request(__rq))
 			break;
-		if (!blk_fs_request(crq->request))
+		if (__crq->requeued)
 			break;
 
-		if (crq->request->sector > __rq->sector)
+		if (__rq->sector <= crq->request->sector)
 			break;
 		if (__rq->sector > last && crq->request->sector < last) {
-			last = crq->request->sector;
+			last = crq->request->sector + crq->request->nr_sectors;
 			break;
 		}
+		entry = &__rq->queuelist;
 	}
 
 	cfqd->last_sector = last;
+
+	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
+
+	cfq_del_crq_rb(crq);
+	cfq_remove_merge_hints(q, crq);
+
 	crq->in_flight = 1;
+	crq->requeued = 0;
 	cfqq->in_flight++;
-	list_add(&crq->request->queuelist, entry);
+	list_add_tail(&crq->request->queuelist, entry);
 }
 
 /*
@@ -771,105 +931,176 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
 static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
-	const int reads = !list_empty(&cfqq->fifo[0]);
-	const int writes = !list_empty(&cfqq->fifo[1]);
-	unsigned long now = jiffies;
+	struct request *rq;
 	struct cfq_rq *crq;
 
-	if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
+	if (cfqq->fifo_expire)
 		return NULL;
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
-	if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
-	}
+	if (!list_empty(&cfqq->fifo)) {
+		int fifo = cfq_cfqq_sync(cfqq);
 
-	crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
-	if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
-		cfqq->last_fifo_expire = now;
-		return crq;
+		crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
+		rq = crq->request;
+		if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
+			cfqq->fifo_expire = 1;
+			return crq;
+		}
 	}
 
 	return NULL;
 }
 
 /*
- * dispatch a single request from given queue
+ * Scale schedule slice based on io priority
  */
+static inline int
+cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
+
+	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
+
+	return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
+}
+
 static inline void
-cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
-		     struct cfq_queue *cfqq)
+cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	struct cfq_rq *crq;
+	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+}
 
-	/*
-	 * follow expired path, else get first next available
-	 */
-	if ((crq = cfq_check_fifo(cfqq)) == NULL) {
-		if (cfqd->find_best_crq)
-			crq = cfqq->next_crq;
-		else
-			crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-	}
+static inline int
+cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	const int base_rq = cfqd->cfq_slice_async_rq;
 
-	cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+	WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
 
-	/*
-	 * finally, insert request into driver list
-	 */
-	cfq_dispatch_sort(q, crq);
+	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
 }
 
-static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
+/*
+ * get next queue for service
+ */
+static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
+	unsigned long now = jiffies;
 	struct cfq_queue *cfqq;
-	struct list_head *entry, *tmp;
-	int queued, busy_queues, first_round;
 
-	if (list_empty(&cfqd->rr_list))
-		return 0;
+	cfqq = cfqd->active_queue;
+	if (!cfqq)
+		goto new_queue;
 
-	queued = 0;
-	first_round = 1;
-restart:
-	busy_queues = 0;
-	list_for_each_safe(entry, tmp, &cfqd->rr_list) {
-		cfqq = list_entry_cfqq(entry);
+	/*
+	 * slice has expired
+	 */
+	if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end))
+		goto new_queue;
 
-		BUG_ON(RB_EMPTY(&cfqq->sort_list));
+	/*
+	 * if queue has requests, dispatch one. if not, check if
+	 * enough slice is left to wait for one
+	 */
+	if (!RB_EMPTY(&cfqq->sort_list))
+		goto keep_queue;
+	else if (!force && cfq_cfqq_sync(cfqq) &&
+		 time_before(now, cfqq->slice_end)) {
+		if (cfq_arm_slice_timer(cfqd, cfqq))
+			return NULL;
+	}
+
+new_queue:
+	cfq_slice_expired(cfqd, 0);
+	cfq_set_active_queue(cfqd);
+keep_queue:
+	return cfqd->active_queue;
+}
+
+static int
+__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+			int max_dispatch)
+{
+	int dispatched = 0;
+
+	BUG_ON(RB_EMPTY(&cfqq->sort_list));
+
+	do {
+		struct cfq_rq *crq;
 
 		/*
-		 * first round of queueing, only select from queues that
-		 * don't already have io in-flight
+		 * follow expired path, else get first next available
 		 */
-		if (first_round && cfqq->in_flight)
-			continue;
+		if ((crq = cfq_check_fifo(cfqq)) == NULL)
+			crq = cfqq->next_crq;
+
+		/*
+		 * finally, insert request into driver dispatch list
+		 */
+		cfq_dispatch_sort(cfqd->queue, crq);
 
-		cfq_dispatch_request(q, cfqd, cfqq);
+		cfqd->dispatch_slice++;
+		dispatched++;
 
-		if (!RB_EMPTY(&cfqq->sort_list))
-			busy_queues++;
+		if (!cfqd->active_cic) {
+			atomic_inc(&crq->io_context->ioc->refcount);
+			cfqd->active_cic = crq->io_context;
+		}
 
-		queued++;
-	}
+		if (RB_EMPTY(&cfqq->sort_list))
+			break;
+
+	} while (dispatched < max_dispatch);
+
+	/*
+	 * if slice end isn't set yet, set it. if at least one request was
+	 * sync, use the sync time slice value
+	 */
+	if (!cfqq->slice_end)
+		cfq_set_prio_slice(cfqd, cfqq);
+
+	/*
+	 * expire an async queue immediately if it has used up its slice. idle
+	 * queue always expire after 1 dispatch round.
+	 */
+	if ((!cfq_cfqq_sync(cfqq) &&
+	    cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
+	    cfq_class_idle(cfqq))
+		cfq_slice_expired(cfqd, 0);
+
+	return dispatched;
+}
+
+static int
+cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+{
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	struct cfq_queue *cfqq;
+
+	if (!cfqd->busy_queues)
+		return 0;
+
+	cfqq = cfq_select_queue(cfqd, force);
+	if (cfqq) {
+		cfqq->wait_request = 0;
+		cfqq->must_dispatch = 0;
+		del_timer(&cfqd->idle_slice_timer);
+
+		if (cfq_class_idle(cfqq))
+			max_dispatch = 1;
 
-	if ((queued < max_dispatch) && (busy_queues || first_round)) {
-		first_round = 0;
-		goto restart;
+		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
 
-	return queued;
+	return 0;
 }
 
 static inline void cfq_account_dispatch(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
 	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now, elapsed;
 
-	if (!blk_fs_request(crq->request))
+	if (unlikely(!blk_fs_request(crq->request)))
 		return;
 
 	/*
@@ -879,65 +1110,34 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq)
 	if (crq->accounted)
 		return;
 
-	now = jiffies;
-	if (cfqq->service_start == ~0UL)
-		cfqq->service_start = now;
-
-	/*
-	 * on drives with tagged command queueing, command turn-around time
-	 * doesn't necessarily reflect the time spent processing this very
-	 * command inside the drive. so do the accounting differently there,
-	 * by just sorting on the number of requests
-	 */
-	if (cfqd->cfq_tagged) {
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used /= 10;
-		}
-
-		cfqq->service_used++;
-		cfq_sort_rr_list(cfqq, 0);
-	}
-
-	elapsed = now - crq->queue_start;
-	if (elapsed > max_elapsed_dispatch)
-		max_elapsed_dispatch = elapsed;
-
 	crq->accounted = 1;
-	crq->service_start = now;
-
-	if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
-		cfqq->cfqd->cfq_tagged = 1;
-		printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
-	}
+	cfqd->rq_in_driver++;
 }
 
 static inline void
 cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
 	struct cfq_data *cfqd = cfqq->cfqd;
+	unsigned long now;
 
 	if (!crq->accounted)
 		return;
 
+	now = jiffies;
+
 	WARN_ON(!cfqd->rq_in_driver);
 	cfqd->rq_in_driver--;
 
-	if (!cfqd->cfq_tagged) {
-		unsigned long now = jiffies;
-		unsigned long duration = now - crq->service_start;
+	if (!cfq_class_idle(cfqq))
+		cfqd->last_end_request = now;
 
-		if (time_after(now, cfqq->service_start + cfq_service)) {
-			cfqq->service_start = now;
-			cfqq->service_used >>= 3;
-		}
-
-		cfqq->service_used += duration;
-		cfq_sort_rr_list(cfqq, 0);
-
-		if (duration > max_elapsed_crq)
-			max_elapsed_crq = duration;
+	if (!cfqq->in_flight && cfqq->on_rr) {
+		cfqq->service_last = now;
+		cfq_resort_rr_list(cfqq, 0);
 	}
+
+	if (crq->is_sync)
+		crq->io_context->last_end_request = now;
 }
 
 static struct request *cfq_next_request(request_queue_t *q)
@@ -950,7 +1150,15 @@ static struct request *cfq_next_request(request_queue_t *q)
 dispatch:
 		rq = list_entry_rq(q->queue_head.next);
 
-		if ((crq = RQ_DATA(rq)) != NULL) {
+		crq = RQ_DATA(rq);
+		if (crq) {
+			/*
+			 * if idle window is disabled, allow queue buildup
+			 */
+			if (!crq->in_flight && !crq->cfq_queue->idle_window &&
+			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+				return NULL;
+
 			cfq_remove_merge_hints(q, crq);
 			cfq_account_dispatch(crq);
 		}
@@ -958,7 +1166,7 @@ dispatch:
 		return rq;
 	}
 
-	if (cfq_dispatch_requests(q, cfqd->cfq_quantum))
+	if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
 		goto dispatch;
 
 	return NULL;
@@ -972,14 +1180,22 @@ dispatch:
  */
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
-	BUG_ON(!atomic_read(&cfqq->ref));
+	struct cfq_data *cfqd = cfqq->cfqd;
+
+	BUG_ON(atomic_read(&cfqq->ref) <= 0);
 
 	if (!atomic_dec_and_test(&cfqq->ref))
 		return;
 
 	BUG_ON(rb_first(&cfqq->sort_list));
+	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
 	BUG_ON(cfqq->on_rr);
 
+	if (unlikely(cfqd->active_queue == cfqq)) {
+		cfq_slice_expired(cfqd, 0);
+		kblockd_schedule_work(&cfqd->unplug_work);
+	}
+
 	cfq_put_cfqd(cfqq->cfqd);
 
 	/*
@@ -991,7 +1207,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 }
 
 static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval)
+__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval)
 {
 	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
 	struct hlist_node *entry, *next;
@@ -1007,94 +1223,220 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval)
 }
 
 static struct cfq_queue *
-cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key)
+cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key)
 {
 	return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT));
 }
 
-static inline void
-cfq_rehash_cfqq(struct cfq_data *cfqd, struct cfq_queue **cfqq,
-		struct cfq_io_context *cic)
+static void cfq_free_io_context(struct cfq_io_context *cic)
 {
-	unsigned long hashkey = cfq_hash_key(cfqd, current);
-	unsigned long hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
-	struct cfq_queue *__cfqq;
-	unsigned long flags;
+	struct cfq_io_context *__cic;
+	struct list_head *entry, *next;
 
-	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-
-	hlist_del(&(*cfqq)->cfq_hash);
-
-	__cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
-	if (!__cfqq || __cfqq == *cfqq) {
-		__cfqq = *cfqq;
-		hlist_add_head(&__cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
-		__cfqq->key_type = cfqd->key_type;
-	} else {
-		atomic_inc(&__cfqq->ref);
-		cic->cfqq = __cfqq;
-		cfq_put_queue(*cfqq);
-		*cfqq = __cfqq;
+	list_for_each_safe(entry, next, &cic->list) {
+		__cic = list_entry(entry, struct cfq_io_context, list);
+		kmem_cache_free(cfq_ioc_pool, __cic);
 	}
 
-	cic->cfqq = __cfqq;
-	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+	kmem_cache_free(cfq_ioc_pool, cic);
 }
 
-static void cfq_free_io_context(struct cfq_io_context *cic)
+/*
+ * Called with interrupts disabled
+ */
+static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 {
-	kmem_cache_free(cfq_ioc_pool, cic);
+	struct cfq_data *cfqd = cic->cfqq->cfqd;
+	request_queue_t *q = cfqd->queue;
+
+	WARN_ON(!irqs_disabled());
+
+	spin_lock(q->queue_lock);
+
+	if (unlikely(cic->cfqq == cfqd->active_queue)) {
+		cfq_slice_expired(cfqd, 0);
+		kblockd_schedule_work(&cfqd->unplug_work);
+	}
+
+	cfq_put_queue(cic->cfqq);
+	cic->cfqq = NULL;
+	spin_unlock(q->queue_lock);
 }
 
 /*
- * locking hierarchy is: io_context lock -> queue locks
+ * Another task may update the task cic list, if it is doing a queue lookup
+ * on its behalf. cfq_cic_lock excludes such concurrent updates
  */
 static void cfq_exit_io_context(struct cfq_io_context *cic)
 {
-	struct cfq_queue *cfqq = cic->cfqq;
-	struct list_head *entry = &cic->list;
-	request_queue_t *q;
+	struct cfq_io_context *__cic;
+	struct list_head *entry;
 	unsigned long flags;
 
+	local_irq_save(flags);
+
 	/*
 	 * put the reference this task is holding to the various queues
 	 */
-	spin_lock_irqsave(&cic->ioc->lock, flags);
-	while ((entry = cic->list.next) != &cic->list) {
-		struct cfq_io_context *__cic;
-
+	list_for_each(entry, &cic->list) {
 		__cic = list_entry(entry, struct cfq_io_context, list);
-		list_del(entry);
-
-		q = __cic->cfqq->cfqd->queue;
-		spin_lock(q->queue_lock);
-		cfq_put_queue(__cic->cfqq);
-		spin_unlock(q->queue_lock);
+		cfq_exit_single_io_context(__cic);
 	}
 
-	q = cfqq->cfqd->queue;
-	spin_lock(q->queue_lock);
-	cfq_put_queue(cfqq);
-	spin_unlock(q->queue_lock);
-
-	cic->cfqq = NULL;
-	spin_unlock_irqrestore(&cic->ioc->lock, flags);
+	cfq_exit_single_io_context(cic);
+	local_irq_restore(flags);
 }
 
-static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags)
+static struct cfq_io_context *
+cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
 {
-	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_flags);
+	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
 	if (cic) {
-		cic->dtor = cfq_free_io_context;
-		cic->exit = cfq_exit_io_context;
 		INIT_LIST_HEAD(&cic->list);
 		cic->cfqq = NULL;
+		cic->key = NULL;
+		cic->last_end_request = jiffies;
+		cic->ttime_total = 0;
+		cic->ttime_samples = 0;
+		cic->ttime_mean = 0;
+		cic->dtor = cfq_free_io_context;
+		cic->exit = cfq_exit_io_context;
 	}
 
 	return cic;
 }
 
+static void cfq_init_prio_data(struct cfq_queue *cfqq)
+{
+	struct task_struct *tsk = current;
+	int ioprio_class;
+
+	if (!cfqq->prio_changed)
+		return;
+
+	ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
+	switch (ioprio_class) {
+		default:
+			printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
+		case IOPRIO_CLASS_NONE:
+			/*
+			 * no prio set, place us in the middle of the BE classes
+			 */
+			cfqq->ioprio = task_nice_ioprio(tsk);
+			cfqq->ioprio_class = IOPRIO_CLASS_BE;
+			break;
+		case IOPRIO_CLASS_RT:
+			cfqq->ioprio = task_ioprio(tsk);
+			cfqq->ioprio_class = IOPRIO_CLASS_RT;
+			break;
+		case IOPRIO_CLASS_BE:
+			cfqq->ioprio = task_ioprio(tsk);
+			cfqq->ioprio_class = IOPRIO_CLASS_BE;
+			break;
+		case IOPRIO_CLASS_IDLE:
+			cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
+			cfqq->ioprio = 7;
+			cfqq->idle_window = 0;
+			break;
+	}
+
+	/*
+	 * keep track of original prio settings in case we have to temporarily
+	 * elevate the priority of this queue
+	 */
+	cfqq->org_ioprio = cfqq->ioprio;
+	cfqq->org_ioprio_class = cfqq->ioprio_class;
+
+	if (cfqq->on_rr)
+		cfq_resort_rr_list(cfqq, 0);
+
+	cfqq->prio_changed = 0;
+}
+
+static inline void changed_ioprio(struct cfq_queue *cfqq)
+{
+	if (cfqq) {
+		struct cfq_data *cfqd = cfqq->cfqd;
+
+		spin_lock(cfqd->queue->queue_lock);
+		cfqq->prio_changed = 1;
+		cfq_init_prio_data(cfqq);
+		spin_unlock(cfqd->queue->queue_lock);
+	}
+}
+
+/*
+ * callback from sys_ioprio_set, irqs are disabled
+ */
+static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+{
+	struct cfq_io_context *cic = ioc->cic;
+
+	changed_ioprio(cic->cfqq);
+
+	list_for_each_entry(cic, &cic->list, list)
+		changed_ioprio(cic->cfqq);
+
+	return 0;
+}
+
+static struct cfq_queue *
+cfq_get_queue(struct cfq_data *cfqd, unsigned int key, int gfp_mask)
+{
+	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
+	struct cfq_queue *cfqq, *new_cfqq = NULL;
+
+retry:
+	cfqq = __cfq_find_cfq_hash(cfqd, key, hashval);
+
+	if (!cfqq) {
+		if (new_cfqq) {
+			cfqq = new_cfqq;
+			new_cfqq = NULL;
+		} else if (gfp_mask & __GFP_WAIT) {
+			spin_unlock_irq(cfqd->queue->queue_lock);
+			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			spin_lock_irq(cfqd->queue->queue_lock);
+			goto retry;
+		} else {
+			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			if (!cfqq)
+				goto out;
+		}
+
+		memset(cfqq, 0, sizeof(*cfqq));
+
+		INIT_HLIST_NODE(&cfqq->cfq_hash);
+		INIT_LIST_HEAD(&cfqq->cfq_list);
+		RB_CLEAR_ROOT(&cfqq->sort_list);
+		INIT_LIST_HEAD(&cfqq->fifo);
+
+		cfqq->key = key;
+		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
+		atomic_set(&cfqq->ref, 0);
+		cfqq->cfqd = cfqd;
+		atomic_inc(&cfqd->ref);
+		cfqq->service_last = 0;
+		/*
+		 * set ->slice_left to allow preemption for a new process
+		 */
+		cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
+		cfqq->idle_window = 1;
+		cfqq->ioprio = -1;
+		cfqq->ioprio_class = -1;
+		cfqq->prio_changed = 1;
+	}
+
+	if (new_cfqq)
+		kmem_cache_free(cfq_pool, new_cfqq);
+
+	atomic_inc(&cfqq->ref);
+out:
+	WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
+	return cfqq;
+}
+
 /*
  * Setup general io context and cfq io context. There can be several cfq
  * io contexts per general io context, if this process is doing io to more
@@ -1102,39 +1444,39 @@ static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags)
  * cfqq, so we don't need to worry about it disappearing
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags)
+cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
 {
-	struct cfq_data *cfqd = (*cfqq)->cfqd;
-	struct cfq_queue *__cfqq = *cfqq;
+	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
-	struct io_context *ioc;
 
-	might_sleep_if(gfp_flags & __GFP_WAIT);
+	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	ioc = get_io_context(gfp_flags);
+	ioc = get_io_context(gfp_mask);
 	if (!ioc)
 		return NULL;
 
 	if ((cic = ioc->cic) == NULL) {
-		cic = cfq_alloc_io_context(gfp_flags);
+		cic = cfq_alloc_io_context(cfqd, gfp_mask);
 
 		if (cic == NULL)
 			goto err;
 
+		/*
+		 * manually increment generic io_context usage count, it
+		 * cannot go away since we are already holding one ref to it
+		 */
 		ioc->cic = cic;
+		ioc->set_ioprio = cfq_ioc_set_ioprio;
 		cic->ioc = ioc;
-		cic->cfqq = __cfqq;
-		atomic_inc(&__cfqq->ref);
+		cic->key = cfqd;
+		atomic_inc(&cfqd->ref);
 	} else {
 		struct cfq_io_context *__cic;
-		unsigned long flags;
 
 		/*
-		 * since the first cic on the list is actually the head
-		 * itself, need to check this here or we'll duplicate an
-		 * cic per ioc for no reason
+		 * the first cic on the list is actually the head itself
 		 */
-		if (cic->cfqq == __cfqq)
+		if (cic->key == cfqd)
 			goto out;
 
 		/*
@@ -1142,152 +1484,259 @@ cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags)
 		 * should be ok here, the list will usually not be more than
 		 * 1 or a few entries long
 		 */
-		spin_lock_irqsave(&ioc->lock, flags);
 		list_for_each_entry(__cic, &cic->list, list) {
 			/*
 			 * this process is already holding a reference to
 			 * this queue, so no need to get one more
 			 */
-			if (__cic->cfqq == __cfqq) {
+			if (__cic->key == cfqd) {
 				cic = __cic;
-				spin_unlock_irqrestore(&ioc->lock, flags);
 				goto out;
 			}
 		}
-		spin_unlock_irqrestore(&ioc->lock, flags);
 
 		/*
 		 * nope, process doesn't have a cic assoicated with this
 		 * cfqq yet. get a new one and add to list
 		 */
-		__cic = cfq_alloc_io_context(gfp_flags);
+		__cic = cfq_alloc_io_context(cfqd, gfp_mask);
 		if (__cic == NULL)
 			goto err;
 
 		__cic->ioc = ioc;
-		__cic->cfqq = __cfqq;
-		atomic_inc(&__cfqq->ref);
-		spin_lock_irqsave(&ioc->lock, flags);
+		__cic->key = cfqd;
+		atomic_inc(&cfqd->ref);
 		list_add(&__cic->list, &cic->list);
-		spin_unlock_irqrestore(&ioc->lock, flags);
-
 		cic = __cic;
-		*cfqq = __cfqq;
 	}
 
 out:
-	/*
-	 * if key_type has been changed on the fly, we lazily rehash
-	 * each queue at lookup time
-	 */
-	if ((*cfqq)->key_type != cfqd->key_type)
-		cfq_rehash_cfqq(cfqd, cfqq, cic);
-
 	return cic;
 err:
 	put_io_context(ioc);
 	return NULL;
 }
 
-static struct cfq_queue *
-__cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask)
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
 {
-	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
-	struct cfq_queue *cfqq, *new_cfqq = NULL;
+	unsigned long elapsed, ttime;
 
-retry:
-	cfqq = __cfq_find_cfq_hash(cfqd, key, hashval);
+	/*
+	 * if this context already has stuff queued, thinktime is from
+	 * last queue not last end
+	 */
+#if 0
+	if (time_after(cic->last_end_request, cic->last_queue))
+		elapsed = jiffies - cic->last_end_request;
+	else
+		elapsed = jiffies - cic->last_queue;
+#else
+		elapsed = jiffies - cic->last_end_request;
+#endif
 
-	if (!cfqq) {
-		if (new_cfqq) {
-			cfqq = new_cfqq;
-			new_cfqq = NULL;
-		} else {
-			spin_unlock_irq(cfqd->queue->queue_lock);
-			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
-			spin_lock_irq(cfqd->queue->queue_lock);
+	ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
 
-			if (!new_cfqq && !(gfp_mask & __GFP_WAIT))
-				goto out;
-
-			goto retry;
-		}
+	cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
+	cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
+	cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+}
 
-		memset(cfqq, 0, sizeof(*cfqq));
+#define sample_valid(samples)	((samples) > 80)
 
-		INIT_HLIST_NODE(&cfqq->cfq_hash);
-		INIT_LIST_HEAD(&cfqq->cfq_list);
-		RB_CLEAR_ROOT(&cfqq->sort_list);
-		INIT_LIST_HEAD(&cfqq->fifo[0]);
-		INIT_LIST_HEAD(&cfqq->fifo[1]);
+/*
+ * Disable idle window if the process thinks too long or seeks so much that
+ * it doesn't matter
+ */
+static void
+cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		       struct cfq_io_context *cic)
+{
+	int enable_idle = cfqq->idle_window;
 
-		cfqq->key = key;
-		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
-		atomic_set(&cfqq->ref, 0);
-		cfqq->cfqd = cfqd;
-		atomic_inc(&cfqd->ref);
-		cfqq->key_type = cfqd->key_type;
-		cfqq->service_start = ~0UL;
+	if (!cic->ioc->task || !cfqd->cfq_slice_idle)
+		enable_idle = 0;
+	else if (sample_valid(cic->ttime_samples)) {
+		if (cic->ttime_mean > cfqd->cfq_slice_idle)
+			enable_idle = 0;
+		else
+			enable_idle = 1;
 	}
 
-	if (new_cfqq)
-		kmem_cache_free(cfq_pool, new_cfqq);
+	cfqq->idle_window = enable_idle;
+}
 
-	atomic_inc(&cfqq->ref);
-out:
-	WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
-	return cfqq;
+
+/*
+ * Check if new_cfqq should preempt the currently active queue. Return 0 for
+ * no or if we aren't sure, a 1 will cause a preempt.
+ */
+static int
+cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
+		   struct cfq_rq *crq)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfq_class_idle(new_cfqq))
+		return 0;
+
+	if (!cfqq)
+		return 1;
+
+	if (cfq_class_idle(cfqq))
+		return 1;
+	if (!new_cfqq->wait_request)
+		return 0;
+	/*
+	 * if it doesn't have slice left, forget it
+	 */
+	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
+		return 0;
+	if (crq->is_sync && !cfq_cfqq_sync(cfqq))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * cfqq preempts the active queue. if we allowed preempt with no slice left,
+ * let it have half of its nominal slice.
+ */
+static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	struct cfq_queue *__cfqq, *next;
+
+	list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
+		cfq_resort_rr_list(__cfqq, 1);
+
+	if (!cfqq->slice_left)
+		cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
+
+	cfqq->slice_end = cfqq->slice_left + jiffies;
+	cfq_slice_expired(cfqd, 1);
+	__cfq_set_active_queue(cfqd, cfqq);
+}
+
+/*
+ * should really be a ll_rw_blk.c helper
+ */
+static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	request_queue_t *q = cfqd->queue;
+
+	if (!blk_queue_plugged(q))
+		q->request_fn(q);
+	else
+		__generic_unplug_device(q);
+}
+
+/*
+ * Called when a new fs request (crq) is added (to cfqq). Check if there's
+ * something we should do about it
+ */
+static void
+cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		 struct cfq_rq *crq)
+{
+	const int sync = crq->is_sync;
+
+	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
+
+	if (sync) {
+		struct cfq_io_context *cic = crq->io_context;
+
+		cfq_update_io_thinktime(cfqd, cic);
+		cfq_update_idle_window(cfqd, cfqq, cic);
+
+		cic->last_queue = jiffies;
+	}
+
+	if (cfqq == cfqd->active_queue) {
+		/*
+		 * if we are waiting for a request for this queue, let it rip
+		 * immediately and flag that we must not expire this queue
+		 * just now
+		 */
+		if (cfqq->wait_request) {
+			cfqq->must_dispatch = 1;
+			del_timer(&cfqd->idle_slice_timer);
+			cfq_start_queueing(cfqd, cfqq);
+		}
+	} else if (cfq_should_preempt(cfqd, cfqq, crq)) {
+		/*
+		 * not the active queue - expire current slice if it is
+		 * idle and has expired it's mean thinktime or this new queue
+		 * has some old slice time left and is of higher priority
+		 */
+		cfq_preempt_queue(cfqd, cfqq);
+		cfqq->must_dispatch = 1;
+		cfq_start_queueing(cfqd, cfqq);
+	}
 }
 
-static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
+static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
 {
-	crq->is_sync = 0;
-	if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
-		crq->is_sync = 1;
+	struct cfq_rq *crq = RQ_DATA(rq);
+	struct cfq_queue *cfqq = crq->cfq_queue;
+
+	cfq_init_prio_data(cfqq);
 
 	cfq_add_crq_rb(crq);
-	crq->queue_start = jiffies;
 
-	list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
+	list_add_tail(&rq->queuelist, &cfqq->fifo);
+
+	if (rq_mergeable(rq)) {
+		cfq_add_crq_hash(cfqd, crq);
+
+		if (!cfqd->queue->last_merge)
+			cfqd->queue->last_merge = rq;
+	}
+
+	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
 static void
 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
 
 	switch (where) {
 		case ELEVATOR_INSERT_BACK:
-			while (cfq_dispatch_requests(q, cfqd->cfq_quantum))
+			while (cfq_dispatch_requests(q, INT_MAX, 1))
 				;
 			list_add_tail(&rq->queuelist, &q->queue_head);
+			/*
+			 * If we were idling with pending requests on
+			 * inactive cfqqs, force dispatching will
+			 * remove the idle timer and the queue won't
+			 * be kicked by __make_request() afterward.
+			 * Kick it here.
+			 */
+			kblockd_schedule_work(&cfqd->unplug_work);
 			break;
 		case ELEVATOR_INSERT_FRONT:
 			list_add(&rq->queuelist, &q->queue_head);
 			break;
 		case ELEVATOR_INSERT_SORT:
 			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(cfqd, crq);
+			cfq_enqueue(cfqd, rq);
 			break;
 		default:
 			printk("%s: bad insert point %d\n", __FUNCTION__,where);
 			return;
 	}
+}
 
-	if (rq_mergeable(rq)) {
-		cfq_add_crq_hash(cfqd, crq);
-
-		if (!q->last_merge)
-			q->last_merge = rq;
-	}
+static inline int cfq_pending_requests(struct cfq_data *cfqd)
+{
+	return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
 }
 
 static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	return list_empty(&q->queue_head) && list_empty(&cfqd->rr_list);
+	return !cfq_pending_requests(cfqd);
 }
 
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
@@ -1332,51 +1781,132 @@ cfq_latter_request(request_queue_t *q, struct request *rq)
 	return NULL;
 }
 
-static int cfq_may_queue(request_queue_t *q, int rw)
+/*
+ * we temporarily boost lower priority queues if they are holding fs exclusive
+ * resources. they are boosted to normal prio (CLASS_BE/4)
+ */
+static void cfq_prio_boost(struct cfq_queue *cfqq)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_queue *cfqq;
-	int ret = ELV_MQUEUE_MAY;
+	const int ioprio_class = cfqq->ioprio_class;
+	const int ioprio = cfqq->ioprio;
 
-	if (current->flags & PF_MEMALLOC)
-		return ELV_MQUEUE_MAY;
+	if (has_fs_excl()) {
+		/*
+		 * boost idle prio on transactions that would lock out other
+		 * users of the filesystem
+		 */
+		if (cfq_class_idle(cfqq))
+			cfqq->ioprio_class = IOPRIO_CLASS_BE;
+		if (cfqq->ioprio > IOPRIO_NORM)
+			cfqq->ioprio = IOPRIO_NORM;
+	} else {
+		/*
+		 * check if we need to unboost the queue
+		 */
+		if (cfqq->ioprio_class != cfqq->org_ioprio_class)
+			cfqq->ioprio_class = cfqq->org_ioprio_class;
+		if (cfqq->ioprio != cfqq->org_ioprio)
+			cfqq->ioprio = cfqq->org_ioprio;
+	}
 
-	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
-	if (cfqq) {
-		int limit = cfqd->max_queued;
+	/*
+	 * refile between round-robin lists if we moved the priority class
+	 */
+	if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
+	    cfqq->on_rr)
+		cfq_resort_rr_list(cfqq, 0);
+}
 
-		if (cfqq->allocated[rw] < cfqd->cfq_queued)
-			return ELV_MQUEUE_MUST;
+static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
+{
+	if (rw == READ || process_sync(task))
+		return task->pid;
 
-		if (cfqd->busy_queues)
-			limit = q->nr_requests / cfqd->busy_queues;
+	return CFQ_KEY_ASYNC;
+}
 
-		if (limit < cfqd->cfq_queued)
-			limit = cfqd->cfq_queued;
-		else if (limit > cfqd->max_queued)
-			limit = cfqd->max_queued;
+static inline int
+__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		struct task_struct *task, int rw)
+{
+	if (cfqq->wait_request && cfqq->must_alloc)
+		return ELV_MQUEUE_MUST;
 
-		if (cfqq->allocated[rw] >= limit) {
-			if (limit > cfqq->alloc_limit[rw])
-				cfqq->alloc_limit[rw] = limit;
+	return ELV_MQUEUE_MAY;
+#if 0
+	if (!cfqq || task->flags & PF_MEMALLOC)
+		return ELV_MQUEUE_MAY;
+	if (!cfqq->allocated[rw] || cfqq->must_alloc) {
+		if (cfqq->wait_request)
+			return ELV_MQUEUE_MUST;
 
-			ret = ELV_MQUEUE_NO;
+		/*
+		 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
+		 * can quickly flood the queue with writes from a single task
+		 */
+		if (rw == READ || !cfqq->must_alloc_slice) {
+			cfqq->must_alloc_slice = 1;
+			return ELV_MQUEUE_MUST;
 		}
+
+		return ELV_MQUEUE_MAY;
 	}
+	if (cfq_class_idle(cfqq))
+		return ELV_MQUEUE_NO;
+	if (cfqq->allocated[rw] >= cfqd->max_queued) {
+		struct io_context *ioc = get_io_context(GFP_ATOMIC);
+		int ret = ELV_MQUEUE_NO;
 
-	return ret;
+		if (ioc && ioc->nr_batch_requests)
+			ret = ELV_MQUEUE_MAY;
+
+		put_io_context(ioc);
+		return ret;
+	}
+
+	return ELV_MQUEUE_MAY;
+#endif
+}
+
+static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
+{
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	struct task_struct *tsk = current;
+	struct cfq_queue *cfqq;
+
+	/*
+	 * don't force setup of a queue from here, as a call to may_queue
+	 * does not necessarily imply that a request actually will be queued.
+	 * so just lookup a possibly existing queue, or return 'may queue'
+	 * if that fails
+	 */
+	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw));
+	if (cfqq) {
+		cfq_init_prio_data(cfqq);
+		cfq_prio_boost(cfqq);
+
+		return __cfq_may_queue(cfqd, cfqq, tsk, rw);
+	}
+
+	return ELV_MQUEUE_MAY;
 }
 
 static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request_list *rl = &q->rq;
-	const int write = waitqueue_active(&rl->wait[WRITE]);
-	const int read = waitqueue_active(&rl->wait[READ]);
 
-	if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
-		wake_up(&rl->wait[READ]);
-	if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
-		wake_up(&rl->wait[WRITE]);
+	if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) {
+		smp_mb();
+		if (waitqueue_active(&rl->wait[READ]))
+			wake_up(&rl->wait[READ]);
+	}
+
+	if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) {
+		smp_mb();
+		if (waitqueue_active(&rl->wait[WRITE]))
+			wake_up(&rl->wait[WRITE]);
+	}
 }
 
 /*
@@ -1389,69 +1919,59 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
 
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
+		const int rw = rq_data_dir(rq);
 
-		BUG_ON(q->last_merge == rq);
-		BUG_ON(!hlist_unhashed(&crq->hash));
-
-		if (crq->io_context)
-			put_io_context(crq->io_context->ioc);
+		BUG_ON(!cfqq->allocated[rw]);
+		cfqq->allocated[rw]--;
 
-		BUG_ON(!cfqq->allocated[crq->is_write]);
-		cfqq->allocated[crq->is_write]--;
+		put_io_context(crq->io_context->ioc);
 
 		mempool_free(crq, cfqd->crq_pool);
 		rq->elevator_private = NULL;
 
-		smp_mb();
 		cfq_check_waiters(q, cfqq);
 		cfq_put_queue(cfqq);
 	}
 }
 
 /*
- * Allocate cfq data structures associated with this request. A queue and
+ * Allocate cfq data structures associated with this request.
  */
-static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int
+cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		int gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_io_context *cic;
 	const int rw = rq_data_dir(rq);
-	struct cfq_queue *cfqq, *saved_cfqq;
+	struct cfq_queue *cfqq;
 	struct cfq_rq *crq;
 	unsigned long flags;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
+	cic = cfq_get_io_context(cfqd, cfq_queue_pid(current, rw), gfp_mask);
+
 	spin_lock_irqsave(q->queue_lock, flags);
 
-	cfqq = __cfq_get_queue(cfqd, cfq_hash_key(cfqd, current), gfp_mask);
-	if (!cfqq)
-		goto out_lock;
+	if (!cic)
+		goto queue_fail;
+
+	if (!cic->cfqq) {
+		cfqq = cfq_get_queue(cfqd, current->pid, gfp_mask);
+		if (!cfqq)
+			goto queue_fail;
 
-repeat:
-	if (cfqq->allocated[rw] >= cfqd->max_queued)
-		goto out_lock;
+		cic->cfqq = cfqq;
+	} else
+		cfqq = cic->cfqq;
 
 	cfqq->allocated[rw]++;
+	cfqq->must_alloc = 0;
+	cfqd->rq_starved = 0;
+	atomic_inc(&cfqq->ref);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	/*
-	 * if hashing type has changed, the cfq_queue might change here.
-	 */
-	saved_cfqq = cfqq;
-	cic = cfq_get_io_context(&cfqq, gfp_mask);
-	if (!cic)
-		goto err;
-
-	/*
-	 * repeat allocation checks on queue change
-	 */
-	if (unlikely(saved_cfqq != cfqq)) {
-		spin_lock_irqsave(q->queue_lock, flags);
-		saved_cfqq->allocated[rw]--;
-		goto repeat;
-	}
-
 	crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
 	if (crq) {
 		RB_CLEAR(&crq->rb_node);
@@ -1460,24 +1980,130 @@ repeat:
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		crq->service_start = crq->queue_start = 0;
-		crq->in_flight = crq->accounted = crq->is_sync = 0;
-		crq->is_write = rw;
+		crq->in_flight = crq->accounted = 0;
+		crq->is_sync = (rw == READ || process_sync(current));
+		crq->requeued = 0;
 		rq->elevator_private = crq;
-		cfqq->alloc_limit[rw] = 0;
 		return 0;
 	}
 
-	put_io_context(cic->ioc);
-err:
 	spin_lock_irqsave(q->queue_lock, flags);
 	cfqq->allocated[rw]--;
+	if (!(cfqq->allocated[0] + cfqq->allocated[1]))
+		cfqq->must_alloc = 1;
 	cfq_put_queue(cfqq);
-out_lock:
+queue_fail:
+	if (cic)
+		put_io_context(cic->ioc);
+	/*
+	 * mark us rq allocation starved. we need to kickstart the process
+	 * ourselves if there are no pending requests that can do it for us.
+	 * that would be an extremely rare OOM situation
+	 */
+	cfqd->rq_starved = 1;
+	kblockd_schedule_work(&cfqd->unplug_work);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 1;
 }
 
+static void cfq_kick_queue(void *data)
+{
+	request_queue_t *q = data;
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (cfqd->rq_starved) {
+		struct request_list *rl = &q->rq;
+
+		/*
+		 * we aren't guaranteed to get a request after this, but we
+		 * have to be opportunistic
+		 */
+		smp_mb();
+		if (waitqueue_active(&rl->wait[READ]))
+			wake_up(&rl->wait[READ]);
+		if (waitqueue_active(&rl->wait[WRITE]))
+			wake_up(&rl->wait[WRITE]);
+	}
+
+	blk_remove_plug(q);
+	q->request_fn(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/*
+ * Timer running if the active_queue is currently idling inside its time slice
+ */
+static void cfq_idle_slice_timer(unsigned long data)
+{
+	struct cfq_data *cfqd = (struct cfq_data *) data;
+	struct cfq_queue *cfqq;
+	unsigned long flags;
+
+	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+
+	if ((cfqq = cfqd->active_queue) != NULL) {
+		unsigned long now = jiffies;
+
+		/*
+		 * expired
+		 */
+		if (time_after(now, cfqq->slice_end))
+			goto expire;
+
+		/*
+		 * only expire and reinvoke request handler, if there are
+		 * other queues with pending requests
+		 */
+		if (!cfq_pending_requests(cfqd)) {
+			cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
+			add_timer(&cfqd->idle_slice_timer);
+			goto out_cont;
+		}
+
+		/*
+		 * not expired and it has a request pending, let it dispatch
+		 */
+		if (!RB_EMPTY(&cfqq->sort_list)) {
+			cfqq->must_dispatch = 1;
+			goto out_kick;
+		}
+	}
+expire:
+	cfq_slice_expired(cfqd, 0);
+out_kick:
+	if (cfq_pending_requests(cfqd))
+		kblockd_schedule_work(&cfqd->unplug_work);
+out_cont:
+	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+}
+
+/*
+ * Timer running if an idle class queue is waiting for service
+ */
+static void cfq_idle_class_timer(unsigned long data)
+{
+	struct cfq_data *cfqd = (struct cfq_data *) data;
+	unsigned long flags, end;
+
+	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+
+	/*
+	 * race with a non-idle queue, reset timer
+	 */
+	end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+	if (!time_after_eq(jiffies, end)) {
+		cfqd->idle_class_timer.expires = end;
+		add_timer(&cfqd->idle_class_timer);
+	} else
+		kblockd_schedule_work(&cfqd->unplug_work);
+
+	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+}
+
+
 static void cfq_put_cfqd(struct cfq_data *cfqd)
 {
 	request_queue_t *q = cfqd->queue;
@@ -1485,6 +2111,8 @@ static void cfq_put_cfqd(struct cfq_data *cfqd)
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
+	blk_sync_queue(q);
+
 	blk_put_queue(q);
 
 	mempool_destroy(cfqd->crq_pool);
@@ -1495,7 +2123,11 @@ static void cfq_put_cfqd(struct cfq_data *cfqd)
 
 static void cfq_exit_queue(elevator_t *e)
 {
-	cfq_put_cfqd(e->elevator_data);
+	struct cfq_data *cfqd = e->elevator_data;
+
+	del_timer_sync(&cfqd->idle_slice_timer);
+	del_timer_sync(&cfqd->idle_class_timer);
+	cfq_put_cfqd(cfqd);
 }
 
 static int cfq_init_queue(request_queue_t *q, elevator_t *e)
@@ -1508,7 +2140,13 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 		return -ENOMEM;
 
 	memset(cfqd, 0, sizeof(*cfqd));
-	INIT_LIST_HEAD(&cfqd->rr_list);
+
+	for (i = 0; i < CFQ_PRIO_LISTS; i++)
+		INIT_LIST_HEAD(&cfqd->rr_list[i]);
+
+	INIT_LIST_HEAD(&cfqd->busy_rr);
+	INIT_LIST_HEAD(&cfqd->cur_rr);
+	INIT_LIST_HEAD(&cfqd->idle_rr);
 	INIT_LIST_HEAD(&cfqd->empty_list);
 
 	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
@@ -1533,25 +2171,32 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->queue = q;
 	atomic_inc(&q->refcnt);
 
-	/*
-	 * just set it to some high value, we want anyone to be able to queue
-	 * some requests. fairness is handled differently
-	 */
-	q->nr_requests = 1024;
-	cfqd->max_queued = q->nr_requests / 16;
+	cfqd->max_queued = q->nr_requests / 4;
 	q->nr_batching = cfq_queued;
-	cfqd->key_type = CFQ_KEY_TGID;
-	cfqd->find_best_crq = 1;
+
+	init_timer(&cfqd->idle_slice_timer);
+	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
+	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
+
+	init_timer(&cfqd->idle_class_timer);
+	cfqd->idle_class_timer.function = cfq_idle_class_timer;
+	cfqd->idle_class_timer.data = (unsigned long) cfqd;
+
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+
 	atomic_set(&cfqd->ref, 1);
 
 	cfqd->cfq_queued = cfq_queued;
 	cfqd->cfq_quantum = cfq_quantum;
-	cfqd->cfq_fifo_expire_r = cfq_fifo_expire_r;
-	cfqd->cfq_fifo_expire_w = cfq_fifo_expire_w;
-	cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
+	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
+	cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
 	cfqd->cfq_back_max = cfq_back_max;
 	cfqd->cfq_back_penalty = cfq_back_penalty;
-
+	cfqd->cfq_slice[0] = cfq_slice_async;
+	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
+	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_max_depth = cfq_max_depth;
 	return 0;
 out_crqpool:
 	kfree(cfqd->cfq_hash);
@@ -1595,7 +2240,6 @@ fail:
 	return -ENOMEM;
 }
 
-
 /*
  * sysfs parts below -->
  */
@@ -1620,45 +2264,6 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 	return count;
 }
 
-static ssize_t
-cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	max_elapsed_dispatch = max_elapsed_crq = 0;
-	return count;
-}
-
-static ssize_t
-cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	spin_lock_irq(cfqd->queue->queue_lock);
-	if (!strncmp(page, "pgid", 4))
-		cfqd->key_type = CFQ_KEY_PGID;
-	else if (!strncmp(page, "tgid", 4))
-		cfqd->key_type = CFQ_KEY_TGID;
-	else if (!strncmp(page, "uid", 3))
-		cfqd->key_type = CFQ_KEY_UID;
-	else if (!strncmp(page, "gid", 3))
-		cfqd->key_type = CFQ_KEY_GID;
-	spin_unlock_irq(cfqd->queue->queue_lock);
-	return count;
-}
-
-static ssize_t
-cfq_read_key_type(struct cfq_data *cfqd, char *page)
-{
-	ssize_t len = 0;
-	int i;
-
-	for (i = CFQ_KEY_PGID; i < CFQ_KEY_LAST; i++) {
-		if (cfqd->key_type == i)
-			len += sprintf(page+len, "[%s] ", cfq_key_types[i]);
-		else
-			len += sprintf(page+len, "%s ", cfq_key_types[i]);
-	}
-	len += sprintf(page+len, "\n");
-	return len;
-}
-
 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
 static ssize_t __FUNC(struct cfq_data *cfqd, char *page)		\
 {									\
@@ -1669,12 +2274,15 @@ static ssize_t __FUNC(struct cfq_data *cfqd, char *page)		\
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
-SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r, 1);
-SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w, 1);
-SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire, 1);
-SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
+SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
+SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
 SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
 SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
+SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
+SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
+SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
+SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -1694,12 +2302,15 @@ static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count)	\
 }
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
-STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX, 1);
-STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX, 1);
-STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX, 1);
-STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
+STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
 STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
+STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
+STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
 #undef STORE_FUNCTION
 
 static struct cfq_fs_entry cfq_quantum_entry = {
@@ -1712,25 +2323,15 @@ static struct cfq_fs_entry cfq_queued_entry = {
 	.show = cfq_queued_show,
 	.store = cfq_queued_store,
 };
-static struct cfq_fs_entry cfq_fifo_expire_r_entry = {
+static struct cfq_fs_entry cfq_fifo_expire_sync_entry = {
 	.attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_fifo_expire_r_show,
-	.store = cfq_fifo_expire_r_store,
+	.show = cfq_fifo_expire_sync_show,
+	.store = cfq_fifo_expire_sync_store,
 };
-static struct cfq_fs_entry cfq_fifo_expire_w_entry = {
+static struct cfq_fs_entry cfq_fifo_expire_async_entry = {
 	.attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_fifo_expire_w_show,
-	.store = cfq_fifo_expire_w_store,
-};
-static struct cfq_fs_entry cfq_fifo_batch_expire_entry = {
-	.attr = {.name = "fifo_batch_expire", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_fifo_batch_expire_show,
-	.store = cfq_fifo_batch_expire_store,
-};
-static struct cfq_fs_entry cfq_find_best_entry = {
-	.attr = {.name = "find_best_crq", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_find_best_show,
-	.store = cfq_find_best_store,
+	.show = cfq_fifo_expire_async_show,
+	.store = cfq_fifo_expire_async_store,
 };
 static struct cfq_fs_entry cfq_back_max_entry = {
 	.attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
@@ -1742,27 +2343,43 @@ static struct cfq_fs_entry cfq_back_penalty_entry = {
 	.show = cfq_back_penalty_show,
 	.store = cfq_back_penalty_store,
 };
-static struct cfq_fs_entry cfq_clear_elapsed_entry = {
-	.attr = {.name = "clear_elapsed", .mode = S_IWUSR },
-	.store = cfq_clear_elapsed,
+static struct cfq_fs_entry cfq_slice_sync_entry = {
+	.attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_sync_show,
+	.store = cfq_slice_sync_store,
 };
-static struct cfq_fs_entry cfq_key_type_entry = {
-	.attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_read_key_type,
-	.store = cfq_set_key_type,
+static struct cfq_fs_entry cfq_slice_async_entry = {
+	.attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_show,
+	.store = cfq_slice_async_store,
+};
+static struct cfq_fs_entry cfq_slice_async_rq_entry = {
+	.attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_async_rq_show,
+	.store = cfq_slice_async_rq_store,
+};
+static struct cfq_fs_entry cfq_slice_idle_entry = {
+	.attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_slice_idle_show,
+	.store = cfq_slice_idle_store,
+};
+static struct cfq_fs_entry cfq_max_depth_entry = {
+	.attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
+	.show = cfq_max_depth_show,
+	.store = cfq_max_depth_store,
 };
-
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
 	&cfq_queued_entry.attr,
-	&cfq_fifo_expire_r_entry.attr,
-	&cfq_fifo_expire_w_entry.attr,
-	&cfq_fifo_batch_expire_entry.attr,
-	&cfq_key_type_entry.attr,
-	&cfq_find_best_entry.attr,
+	&cfq_fifo_expire_sync_entry.attr,
+	&cfq_fifo_expire_async_entry.attr,
 	&cfq_back_max_entry.attr,
 	&cfq_back_penalty_entry.attr,
-	&cfq_clear_elapsed_entry.attr,
+	&cfq_slice_sync_entry.attr,
+	&cfq_slice_async_entry.attr,
+	&cfq_slice_async_rq_entry.attr,
+	&cfq_slice_idle_entry.attr,
+	&cfq_max_depth_entry.attr,
 	NULL,
 };
 
@@ -1832,21 +2449,46 @@ static int __init cfq_init(void)
 {
 	int ret;
 
+	/*
+	 * could be 0 on HZ < 1000 setups
+	 */
+	if (!cfq_slice_async)
+		cfq_slice_async = 1;
+	if (!cfq_slice_idle)
+		cfq_slice_idle = 1;
+
 	if (cfq_slab_setup())
 		return -ENOMEM;
 
 	ret = elv_register(&iosched_cfq);
-	if (!ret) {
-		__module_get(THIS_MODULE);
-		return 0;
-	}
+	if (ret)
+		cfq_slab_kill();
 
-	cfq_slab_kill();
 	return ret;
 }
 
 static void __exit cfq_exit(void)
 {
+	struct task_struct *g, *p;
+	unsigned long flags;
+
+	read_lock_irqsave(&tasklist_lock, flags);
+
+	/*
+	 * iterate each process in the system, removing our io_context
+	 */
+	do_each_thread(g, p) {
+		struct io_context *ioc = p->io_context;
+
+		if (ioc && ioc->cic) {
+			ioc->cic->exit(ioc->cic);
+			cfq_free_io_context(ioc->cic);
+			ioc->cic = NULL;
+		}
+	} while_each_thread(g, p);
+
+	read_unlock_irqrestore(&tasklist_lock, flags);
+
 	cfq_slab_kill();
 	elv_unregister(&iosched_cfq);
 }
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 4bc2fea7327..ff5201e0215 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -760,7 +760,8 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int
-deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		     int gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index f831f08f839..98f0126a2de 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -486,12 +486,13 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 	return NULL;
 }
 
-int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		    int gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
 
 	rq->elevator_private = NULL;
 	return 0;
@@ -505,12 +506,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->ops->elevator_put_req_fn(q, rq);
 }
 
-int elv_may_queue(request_queue_t *q, int rw)
+int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw);
+		return e->ops->elevator_may_queue_fn(q, rw, bio);
 
 	return ELV_MQUEUE_MAY;
 }
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 60e64091de1..234fdcfbdf0 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -276,6 +276,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
+	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->q = q;
@@ -1442,11 +1443,7 @@ void __generic_unplug_device(request_queue_t *q)
 	if (!blk_remove_plug(q))
 		return;
 
-	/*
-	 * was plugged, fire request_fn if queue has stuff to do
-	 */
-	if (elv_next_request(q))
-		q->request_fn(q);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 
@@ -1776,8 +1773,8 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
 	mempool_free(rq, q->rq.rq_pool);
 }
 
-static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
-						int gfp_mask)
+static inline struct request *
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1790,7 +1787,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, gfp_mask))
+	if (!elv_set_request(q, rq, bio, gfp_mask))
 		return rq;
 
 	mempool_free(rq, q->rq.rq_pool);
@@ -1872,7 +1869,8 @@ static void freed_request(request_queue_t *q, int rw)
 /*
  * Get a free request, queue_lock must not be held
  */
-static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
+static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
+				   int gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
@@ -1895,7 +1893,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 		}
 	}
 
-	switch (elv_may_queue(q, rw)) {
+	switch (elv_may_queue(q, rw, bio)) {
 		case ELV_MQUEUE_NO:
 			goto rq_starved;
 		case ELV_MQUEUE_MAY:
@@ -1920,7 +1918,7 @@ get_rq:
 		set_queue_congested(q, rw);
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1961,7 +1959,8 @@ out:
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  */
-static struct request *get_request_wait(request_queue_t *q, int rw)
+static struct request *get_request_wait(request_queue_t *q, int rw,
+					struct bio *bio)
 {
 	DEFINE_WAIT(wait);
 	struct request *rq;
@@ -1972,7 +1971,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 
-		rq = get_request(q, rw, GFP_NOIO);
+		rq = get_request(q, rw, bio, GFP_NOIO);
 
 		if (!rq) {
 			struct io_context *ioc;
@@ -2003,9 +2002,9 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
 	BUG_ON(rw != READ && rw != WRITE);
 
 	if (gfp_mask & __GFP_WAIT)
-		rq = get_request_wait(q, rw);
+		rq = get_request_wait(q, rw, NULL);
 	else
-		rq = get_request(q, rw, gfp_mask);
+		rq = get_request(q, rw, NULL, gfp_mask);
 
 	return rq;
 }
@@ -2333,7 +2332,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 		return;
 
 	req->rq_status = RQ_INACTIVE;
-	req->q = NULL;
 	req->rl = NULL;
 
 	/*
@@ -2462,6 +2460,8 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 		req->rq_disk->in_flight--;
 	}
 
+	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+
 	__blk_put_request(q, next);
 	return 1;
 }
@@ -2514,11 +2514,13 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 {
 	struct request *req, *freereq = NULL;
 	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
+	unsigned short prio;
 	sector_t sector;
 
 	sector = bio->bi_sector;
 	nr_sectors = bio_sectors(bio);
 	cur_nr_sectors = bio_cur_sectors(bio);
+	prio = bio_prio(bio);
 
 	rw = bio_data_dir(bio);
 	sync = bio_sync(bio);
@@ -2559,6 +2561,7 @@ again:
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req);
@@ -2583,6 +2586,7 @@ again:
 			req->hard_cur_sectors = cur_nr_sectors;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req);
@@ -2610,7 +2614,7 @@ get_rq:
 		freereq = NULL;
 	} else {
 		spin_unlock_irq(q->queue_lock);
-		if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
+		if ((freereq = get_request(q, rw, bio, GFP_ATOMIC)) == NULL) {
 			/*
 			 * READA bit set
 			 */
@@ -2618,7 +2622,7 @@ get_rq:
 			if (bio_rw_ahead(bio))
 				goto end_io;
 	
-			freereq = get_request_wait(q, rw);
+			freereq = get_request_wait(q, rw, bio);
 		}
 		goto again;
 	}
@@ -2646,6 +2650,7 @@ get_rq:
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
+	req->ioprio = prio;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
 
@@ -2674,7 +2679,7 @@ static inline void blk_partition_remap(struct bio *bio)
 	if (bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 
-		switch (bio->bi_rw) {
+		switch (bio_data_dir(bio)) {
 		case READ:
 			p->read_sectors += bio_sectors(bio);
 			p->reads++;
@@ -2693,6 +2698,7 @@ void blk_finish_queue_drain(request_queue_t *q)
 {
 	struct request_list *rl = &q->rq;
 	struct request *rq;
+	int requeued = 0;
 
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2701,9 +2707,13 @@ void blk_finish_queue_drain(request_queue_t *q)
 		rq = list_entry_rq(q->drain_list.next);
 
 		list_del_init(&rq->queuelist);
-		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+		elv_requeue_request(q, rq);
+		requeued++;
 	}
 
+	if (requeued)
+		q->request_fn(q);
+
 	spin_unlock_irq(q->queue_lock);
 
 	wake_up(&rl->wait[0]);
@@ -2900,7 +2910,7 @@ void submit_bio(int rw, struct bio *bio)
 
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
-	bio->bi_rw = rw;
+	bio->bi_rw |= rw;
 	if (rw & WRITE)
 		mod_page_state(pgpgout, count);
 	else
@@ -3257,8 +3267,11 @@ void exit_io_context(void)
 	struct io_context *ioc;
 
 	local_irq_save(flags);
+	task_lock(current);
 	ioc = current->io_context;
 	current->io_context = NULL;
+	ioc->task = NULL;
+	task_unlock(current);
 	local_irq_restore(flags);
 
 	if (ioc->aic && ioc->aic->exit)
@@ -3293,12 +3306,12 @@ struct io_context *get_io_context(int gfp_flags)
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
-		ret->pid = tsk->pid;
+		ret->task = current;
+		ret->set_ioprio = NULL;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic = NULL;
-		spin_lock_init(&ret->lock);
 
 		local_irq_save(flags);
 
diff --git a/fs/Makefile b/fs/Makefile
index fc92e59e9fa..20edcf28bfd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,6 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
+		ioprio.o
 
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
diff --git a/fs/ioprio.c b/fs/ioprio.c
new file mode 100644
index 00000000000..663e420636d
--- /dev/null
+++ b/fs/ioprio.c
@@ -0,0 +1,172 @@
+/*
+ * fs/ioprio.c
+ *
+ * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ *
+ * Helper functions for setting/querying io priorities of processes. The
+ * system calls closely mimmick getpriority/setpriority, see the man page for
+ * those. The prio argument is a composite of prio class and prio data, where
+ * the data argument has meaning within that class. The standard scheduling
+ * classes have 8 distinct prio levels, with 0 being the highest prio and 7
+ * being the lowest.
+ *
+ * IOW, setting BE scheduling class with prio 2 is done ala:
+ *
+ * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
+ *
+ * ioprio_set(PRIO_PROCESS, pid, prio);
+ *
+ * See also Documentation/block/ioprio.txt
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ioprio.h>
+#include <linux/blkdev.h>
+
+static int set_task_ioprio(struct task_struct *task, int ioprio)
+{
+	struct io_context *ioc;
+
+	if (task->uid != current->euid &&
+	    task->uid != current->uid && !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	task_lock(task);
+
+	task->ioprio = ioprio;
+
+	ioc = task->io_context;
+	if (ioc && ioc->set_ioprio)
+		ioc->set_ioprio(ioc, ioprio);
+
+	task_unlock(task);
+	return 0;
+}
+
+asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
+{
+	int class = IOPRIO_PRIO_CLASS(ioprio);
+	int data = IOPRIO_PRIO_DATA(ioprio);
+	struct task_struct *p, *g;
+	struct user_struct *user;
+	int ret;
+
+	switch (class) {
+		case IOPRIO_CLASS_RT:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
+			/* fall through, rt has prio field too */
+		case IOPRIO_CLASS_BE:
+			if (data >= IOPRIO_BE_NR || data < 0)
+				return -EINVAL;
+
+			break;
+		case IOPRIO_CLASS_IDLE:
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	ret = -ESRCH;
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = set_task_ioprio(p, ioprio);
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != who)
+					continue;
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
+asmlinkage int sys_ioprio_get(int which, int who)
+{
+	struct task_struct *g, *p;
+	struct user_struct *user;
+	int ret = -ESRCH;
+
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = p->ioprio;
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != user->uid)
+					continue;
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 7b87707acc3..d1bcf0da672 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -645,18 +645,22 @@ struct buffer_chunk {
 
 static void write_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_logged_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static void write_ordered_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_ordered_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* before we can put our commit blocks on disk, we have to make sure everyone older than
   ** us is on disk too
   */
@@ -1055,6 +1061,7 @@ put_jl:
 
   if (retval)
     reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
+  put_fs_excl();
   return retval;
 }
 
@@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s,
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* if all the work is already done, get out of here */
   if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 
       atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1450,6 +1459,7 @@ flush_older_and_return:
   put_journal_list(s, jl);
   if (flushall)
     up(&journal->j_flush_sem);
+  put_fs_excl();
   return err ;
 } 
 
@@ -2719,6 +2729,7 @@ relock:
   th->t_trans_id = journal->j_trans_id ;
   unlock_journal(p_s_sb) ;
   INIT_LIST_HEAD (&th->t_list);
+  get_fs_excl();
   return 0 ;
 
 out_fail:
@@ -3526,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b
   BUG_ON (th->t_refcount > 1);
   BUG_ON (!th->t_trans_id);
 
+  put_fs_excl();
   current->journal_info = th->t_handle_save;
   reiserfs_check_lock_depth(p_s_sb, "journal end");
   if (journal->j_len == 0) {
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 176413fb9ae..e25e4c71a87 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -294,8 +294,10 @@
 #define __NR_add_key		286
 #define __NR_request_key	287
 #define __NR_keyctl		288
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
 
-#define NR_syscalls 289
+#define NR_syscalls 291
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index f7f43ec2483..517f1649ee6 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -263,6 +263,8 @@
 #define __NR_add_key			1271
 #define __NR_request_key		1272
 #define __NR_keyctl			1273
+#define __NR_ioprio_set			1274
+#define __NR_ioprio_get			1275
 #define __NR_set_zone_reclaim		1276
 
 #ifdef __KERNEL__
diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h
index cc51e5c9acc..e8b79220b29 100644
--- a/include/asm-ppc/unistd.h
+++ b/include/asm-ppc/unistd.h
@@ -277,8 +277,10 @@
 #define __NR_request_key	270
 #define __NR_keyctl		271
 #define __NR_waitid		272
+#define __NR_ioprio_set		273
+#define __NR_ioprio_get		274
 
-#define __NR_syscalls		273
+#define __NR_syscalls		275
 
 #define __NR(n)	#n
 
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index d767adcbf0f..6560439a83e 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -561,8 +561,12 @@ __SYSCALL(__NR_add_key, sys_add_key)
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl		250
 __SYSCALL(__NR_keyctl, sys_keyctl)
+#define __NR_ioprio_set		251
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get		252
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
 
-#define __NR_syscall_max __NR_keyctl
+#define __NR_syscall_max __NR_ioprio_get
 #ifndef __NO_STUBS
 
 /* user-visible error numbers are in the range -1 - -4095 */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 038022763f0..36ef29fa0d8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -22,6 +22,7 @@
 
 #include <linux/highmem.h>
 #include <linux/mempool.h>
+#include <linux/ioprio.h>
 
 /* Platforms may set this to teach the BIO layer about IOMMU hardware. */
 #include <asm/io.h>
@@ -149,6 +150,19 @@ struct bio {
 #define BIO_RW_FAILFAST	3
 #define BIO_RW_SYNC	4
 
+/*
+ * upper 16 bits of bi_rw define the io priority of this bio
+ */
+#define BIO_PRIO_SHIFT	(8 * sizeof(unsigned long) - IOPRIO_BITS)
+#define bio_prio(bio)	((bio)->bi_rw >> BIO_PRIO_SHIFT)
+#define bio_prio_valid(bio)	ioprio_valid(bio_prio(bio))
+
+#define bio_set_prio(bio, prio)		do {			\
+	WARN_ON(prio >= (1 << IOPRIO_BITS));			\
+	(bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1);		\
+	(bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT);	\
+} while (0)
+
 /*
  * various member access, note that bio_data should of course not be used
  * on highmem page vectors
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b54a0348a89..21a8674cd14 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -54,16 +54,23 @@ struct as_io_context {
 
 struct cfq_queue;
 struct cfq_io_context {
-	void (*dtor)(struct cfq_io_context *);
-	void (*exit)(struct cfq_io_context *);
-
-	struct io_context *ioc;
-
 	/*
 	 * circular list of cfq_io_contexts belonging to a process io context
 	 */
 	struct list_head list;
 	struct cfq_queue *cfqq;
+	void *key;
+
+	struct io_context *ioc;
+
+	unsigned long last_end_request;
+	unsigned long last_queue;
+	unsigned long ttime_total;
+	unsigned long ttime_samples;
+	unsigned long ttime_mean;
+
+	void (*dtor)(struct cfq_io_context *);
+	void (*exit)(struct cfq_io_context *);
 };
 
 /*
@@ -73,7 +80,9 @@ struct cfq_io_context {
  */
 struct io_context {
 	atomic_t refcount;
-	pid_t pid;
+	struct task_struct *task;
+
+	int (*set_ioprio)(struct io_context *, unsigned int);
 
 	/*
 	 * For request batching
@@ -81,8 +90,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	spinlock_t lock;
-
 	struct as_io_context *aic;
 	struct cfq_io_context *cic;
 };
@@ -134,6 +141,8 @@ struct request {
 
 	void *elevator_private;
 
+	unsigned short ioprio;
+
 	int rq_status;	/* should split this into a few status bits */
 	struct gendisk *rq_disk;
 	int errors;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ee54f81faad..ea6bbc2d740 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -16,9 +16,9 @@ typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
 
-typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int);
 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
 
@@ -96,9 +96,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *);
 extern struct request *elv_latter_request(request_queue_t *, struct request *);
 extern int elv_register_queue(request_queue_t *q);
 extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int);
+extern int elv_may_queue(request_queue_t *, int, struct bio *);
 extern void elv_completed_request(request_queue_t *, struct request *);
-extern int elv_set_request(request_queue_t *, struct request *, int);
+extern int elv_set_request(request_queue_t *, struct request *, struct bio *, int);
 extern void elv_put_request(request_queue_t *, struct request *);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3ae8e37bdfc..047bde30836 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -213,6 +213,7 @@ extern int dir_notify_enable;
 #include <linux/radix-tree.h>
 #include <linux/prio_tree.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -822,16 +823,34 @@ enum {
 #define vfs_check_frozen(sb, level) \
 	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
 
+static inline void get_fs_excl(void)
+{
+	atomic_inc(&current->fs_excl);
+}
+
+static inline void put_fs_excl(void)
+{
+	atomic_dec(&current->fs_excl);
+}
+
+static inline int has_fs_excl(void)
+{
+	return atomic_read(&current->fs_excl);
+}
+
+
 /*
  * Superblock locking.
  */
 static inline void lock_super(struct super_block * sb)
 {
+	get_fs_excl();
 	down(&sb->s_lock);
 }
 
 static inline void unlock_super(struct super_block * sb)
 {
+	put_fs_excl();
 	up(&sb->s_lock);
 }
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 03206a425d7..c727c195a91 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -81,6 +81,7 @@ extern struct group_info init_groups;
 	.mm		= NULL,						\
 	.active_mm	= &init_mm,					\
 	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
+	.ioprio		= 0,						\
 	.time_slice	= HZ,						\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),		\
@@ -110,6 +111,7 @@ extern struct group_info init_groups;
 	.proc_lock	= SPIN_LOCK_UNLOCKED,				\
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
+	.fs_excl	= ATOMIC_INIT(0),				\
 }
 
 
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
new file mode 100644
index 00000000000..7811300d88e
--- /dev/null
+++ b/include/linux/ioprio.h
@@ -0,0 +1,87 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+#include <linux/sched.h>
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS		(16)
+#define IOPRIO_CLASS_SHIFT	(13)
+#define IOPRIO_PRIO_MASK	((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask)	((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask)	((mask) & IOPRIO_PRIO_MASK)
+
+#define ioprio_valid(mask)	(IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+	IOPRIO_CLASS_NONE,
+	IOPRIO_CLASS_RT,
+	IOPRIO_CLASS_BE,
+	IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR	(8)
+
+asmlinkage int sys_ioprio_set(int, int, int);
+asmlinkage int sys_ioprio_get(int, int);
+
+enum {
+	IOPRIO_WHO_PROCESS = 1,
+	IOPRIO_WHO_PGRP,
+	IOPRIO_WHO_USER,
+};
+
+/*
+ * if process has set io priority explicitly, use that. if not, convert
+ * the cpu scheduler nice value to an io priority
+ */
+#define IOPRIO_NORM	(4)
+static inline int task_ioprio(struct task_struct *task)
+{
+	WARN_ON(!ioprio_valid(task->ioprio));
+	return IOPRIO_PRIO_DATA(task->ioprio);
+}
+
+static inline int task_nice_ioprio(struct task_struct *task)
+{
+	return (task_nice(task) + 20) / 5;
+}
+
+/*
+ * For inheritance, return the highest of the two given priorities
+ */
+static inline int ioprio_best(unsigned short aprio, unsigned short bprio)
+{
+	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
+	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+
+	if (!ioprio_valid(aprio))
+		return bprio;
+	if (!ioprio_valid(bprio))
+		return aprio;
+
+	if (aclass == IOPRIO_CLASS_NONE)
+		aclass = IOPRIO_CLASS_BE;
+	if (bclass == IOPRIO_CLASS_NONE)
+		bclass = IOPRIO_CLASS_BE;
+
+	if (aclass == bclass)
+		return min(aprio, bprio);
+	if (aclass > bclass)
+		return bprio;
+	else
+		return aprio;
+}
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9530b190316..ff48815bd3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -608,6 +608,8 @@ struct task_struct {
 	struct list_head run_list;
 	prio_array_t *array;
 
+	unsigned short ioprio;
+
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
 	unsigned long long sched_time; /* sched_clock time spent running */
@@ -763,6 +765,7 @@ struct task_struct {
 	nodemask_t mems_allowed;
 	int cpuset_mems_generation;
 #endif
+	atomic_t fs_excl;	/* holding fs exclusive resources */
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -1112,7 +1115,8 @@ extern void unhash_process(struct task_struct *p);
 
 /*
  * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
- * subscriptions and synchronises with wait4().  Also used in procfs.
+ * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+ * pins the final release of task.io_context.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 1262cb43c3a..d5c3fe1bf33 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -14,11 +14,13 @@ extern struct list_head inode_unused;
  * Yes, writeback.h requires sched.h
  * No, sched.h is not included from here.
  */
-static inline int current_is_pdflush(void)
+static inline int task_is_pdflush(struct task_struct *task)
 {
-	return current->flags & PF_FLUSHER;
+	return task->flags & PF_FLUSHER;
 }
 
+#define current_is_pdflush()	task_is_pdflush(current)
+
 /*
  * fs/fs-writeback.c
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 3ebcd60a19c..9d1b10ed013 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -784,6 +784,8 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	profile_task_exit(tsk);
 
+	WARN_ON(atomic_read(&tsk->fs_excl));
+
 	if (unlikely(in_interrupt()))
 		panic("Aiee, killing interrupt handler!");
 	if (unlikely(!tsk->pid))
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c7806873bf..cdef6cea890 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1090,6 +1090,11 @@ static task_t *copy_process(unsigned long clone_flags,
 		spin_unlock(&current->sighand->siglock);
 	}
 
+	/*
+	 * inherit ioprio
+	 */
+	p->ioprio = current->ioprio;
+
 	SET_LINKS(p);
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
diff --git a/kernel/sched.c b/kernel/sched.c
index a07cff90d84..e2b0d3e4dd0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3448,15 +3448,7 @@ int task_nice(const task_t *p)
 {
 	return TASK_NICE(p);
 }
-
-/*
- * The only users of task_nice are binfmt_elf and binfmt_elf32.
- * binfmt_elf is no longer modular, but binfmt_elf32 still is.
- * Therefore, task_nice is needed if there is a compat_mode.
- */
-#ifdef CONFIG_COMPAT
 EXPORT_SYMBOL_GPL(task_nice);
-#endif
 
 /**
  * idle_cpu - is a given cpu idle currently?
-- 
cgit v1.2.3-70-g09d2


From 3b18152c327707ae6a2eeba4cfb66457143753bc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 27 Jun 2005 10:56:24 +0200
Subject: [PATCH] CFQ io scheduler updates

- Adjust slice values

- Instead of one async queue, one is defined per priority level. This
  prevents kernel threads (such as reiserfs/x and others) that run at
  higher io priority from conflicting with others. Previously, it was a
  coin toss what io prio the async queue got, it was defined by who
  first set up the queue.

- Let a time slice only begin, when the previous slice is completely
  done. Previously we could be somewhat unfair to a new sync slice, if
  the previous slice was async and had several ios queued. This might
  need a little tweaking if throughput suffers a little due to this,
  allowing perhaps an overlap of a single request or so.

- Optimize the calling of kblockd_schedule_work() by doing it only when
  it is strictly necessary (no requests in driver and work left to do).

- Correct sync vs async logic. A 'normal' process can be purely async as
  well, and a flusher can be purely sync as well. Sync or async is now a
  property of the class defined and requests pending. Previously writers
  could be considered sync, when they were really async.

- Get rid of the bit fields in cfqq and crq, use flags instead.

- Various other cleanups and fixes

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/cfq-iosched.c | 460 ++++++++++++++++++++++++++++----------------
 include/linux/ioprio.h      |   1 +
 2 files changed, 300 insertions(+), 161 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index baa3e268250..1ecb179b860 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -34,14 +34,15 @@ static int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
 static int cfq_back_penalty = 2;	/* penalty of a backwards seek */
 
 static int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 50;
+static int cfq_slice_async = HZ / 25;
 static int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 50;
+static int cfq_slice_idle = HZ / 100;
 
 #define CFQ_IDLE_GRACE		(HZ / 10)
 #define CFQ_SLICE_SCALE		(5)
 
 #define CFQ_KEY_ASYNC		(0)
+#define CFQ_KEY_ANY		(0xffff)
 
 /*
  * disable queueing at the driver/hardware level
@@ -96,7 +97,16 @@ static kmem_cache_t *cfq_ioc_pool;
 #define cfq_class_be(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
 
-#define cfq_cfqq_sync(cfqq)	((cfqq)->key != CFQ_KEY_ASYNC)
+#define ASYNC			(0)
+#define SYNC			(1)
+
+#define cfq_cfqq_dispatched(cfqq)	\
+	((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
+
+#define cfq_cfqq_class_sync(cfqq)	((cfqq)->key != CFQ_KEY_ASYNC)
+
+#define cfq_cfqq_sync(cfqq)		\
+	(cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
 
 /*
  * Per block device queue structure
@@ -200,28 +210,15 @@ struct cfq_queue {
 	unsigned long slice_left;
 	unsigned long service_last;
 
-	/* number of requests that have been handed to the driver */
-	int in_flight;
+	/* number of requests that are on the dispatch list */
+	int on_dispatch[2];
 
 	/* io prio of this group */
 	unsigned short ioprio, org_ioprio;
 	unsigned short ioprio_class, org_ioprio_class;
 
-	/* whether queue is on rr (or empty) list */
-	unsigned on_rr : 1;
-	/* idle slice, waiting for new request submission */
-	unsigned wait_request : 1;
-	/* set when wait_request gets set, reset on first rq alloc */
-	unsigned must_alloc : 1;
-	/* only gets one must_alloc per slice */
-	unsigned must_alloc_slice : 1;
-	/* idle slice, request added, now waiting to dispatch it */
-	unsigned must_dispatch : 1;
-	/* fifo expire per-slice */
-	unsigned fifo_expire : 1;
-
-	unsigned idle_window : 1;
-	unsigned prio_changed : 1;
+	/* various state flags, see below */
+	unsigned int flags;
 };
 
 struct cfq_rq {
@@ -233,15 +230,77 @@ struct cfq_rq {
 	struct cfq_queue *cfq_queue;
 	struct cfq_io_context *io_context;
 
-	unsigned in_flight : 1;
-	unsigned accounted : 1;
-	unsigned is_sync   : 1;
-	unsigned requeued  : 1;
+	unsigned int crq_flags;
 };
 
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int);
+enum cfqq_state_flags {
+	CFQ_CFQQ_FLAG_on_rr = 0,
+	CFQ_CFQQ_FLAG_wait_request,
+	CFQ_CFQQ_FLAG_must_alloc,
+	CFQ_CFQQ_FLAG_must_alloc_slice,
+	CFQ_CFQQ_FLAG_must_dispatch,
+	CFQ_CFQQ_FLAG_fifo_expire,
+	CFQ_CFQQ_FLAG_idle_window,
+	CFQ_CFQQ_FLAG_prio_changed,
+	CFQ_CFQQ_FLAG_expired,
+};
+
+#define CFQ_CFQQ_FNS(name)						\
+static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq)		\
+{									\
+	cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name);			\
+}									\
+static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq)	\
+{									\
+	cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name);			\
+}									\
+static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
+{									\
+	return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0;	\
+}
+
+CFQ_CFQQ_FNS(on_rr);
+CFQ_CFQQ_FNS(wait_request);
+CFQ_CFQQ_FNS(must_alloc);
+CFQ_CFQQ_FNS(must_alloc_slice);
+CFQ_CFQQ_FNS(must_dispatch);
+CFQ_CFQQ_FNS(fifo_expire);
+CFQ_CFQQ_FNS(idle_window);
+CFQ_CFQQ_FNS(prio_changed);
+CFQ_CFQQ_FNS(expired);
+#undef CFQ_CFQQ_FNS
+
+enum cfq_rq_state_flags {
+	CFQ_CRQ_FLAG_in_flight = 0,
+	CFQ_CRQ_FLAG_in_driver,
+	CFQ_CRQ_FLAG_is_sync,
+	CFQ_CRQ_FLAG_requeued,
+};
+
+#define CFQ_CRQ_FNS(name)						\
+static inline void cfq_mark_crq_##name(struct cfq_rq *crq)		\
+{									\
+	crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name);			\
+}									\
+static inline void cfq_clear_crq_##name(struct cfq_rq *crq)		\
+{									\
+	crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name);			\
+}									\
+static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
+{									\
+	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
+}
+
+CFQ_CRQ_FNS(in_flight);
+CFQ_CRQ_FNS(in_driver);
+CFQ_CRQ_FNS(is_sync);
+CFQ_CRQ_FNS(requeued);
+#undef CFQ_CRQ_FNS
+
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
 static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
+static inline int cfq_pending_requests(struct cfq_data *cfqd);
 
 #define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
 
@@ -305,9 +364,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 		return crq2;
 	if (crq2 == NULL)
 		return crq1;
-	if (crq1->requeued)
+	if (cfq_crq_requeued(crq1))
 		return crq1;
-	if (crq2->requeued)
+	if (cfq_crq_requeued(crq2))
 		return crq2;
 
 	s1 = crq1->request->sector;
@@ -407,7 +466,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 	struct cfq_data *cfqd = cfqq->cfqd;
 	struct list_head *list, *entry;
 
-	BUG_ON(!cfqq->on_rr);
+	BUG_ON(!cfq_cfqq_on_rr(cfqq));
 
 	list_del(&cfqq->cfq_list);
 
@@ -423,7 +482,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 		 * has lots of io pending vs one that only generates one
 		 * sporadically or synchronously
 		 */
-		if (cfqq->in_flight)
+		if (cfq_cfqq_dispatched(cfqq))
 			list = &cfqd->busy_rr;
 		else
 			list = &cfqd->rr_list[cfqq->ioprio];
@@ -461,8 +520,8 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 static inline void
 cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
 {
-	BUG_ON(cfqq->on_rr);
-	cfqq->on_rr = 1;
+	BUG_ON(cfq_cfqq_on_rr(cfqq));
+	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
 
 	cfq_resort_rr_list(cfqq, requeue);
@@ -471,8 +530,8 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
 static inline void
 cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	BUG_ON(!cfqq->on_rr);
-	cfqq->on_rr = 0;
+	BUG_ON(!cfq_cfqq_on_rr(cfqq));
+	cfq_clear_cfqq_on_rr(cfqq);
 	list_move(&cfqq->cfq_list, &cfqd->empty_list);
 
 	BUG_ON(!cfqd->busy_queues);
@@ -488,7 +547,7 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 
 	if (ON_RB(&crq->rb_node)) {
 		struct cfq_data *cfqd = cfqq->cfqd;
-		const int sync = crq->is_sync;
+		const int sync = cfq_crq_is_sync(crq);
 
 		BUG_ON(!cfqq->queued[sync]);
 		cfqq->queued[sync]--;
@@ -498,7 +557,7 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
 		RB_CLEAR_COLOR(&crq->rb_node);
 
-		if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list))
+		if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
 			cfq_del_cfqq_rr(cfqd, cfqq);
 	}
 }
@@ -534,7 +593,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	struct cfq_rq *__alias;
 
 	crq->rb_key = rq_rb_key(rq);
-	cfqq->queued[crq->is_sync]++;
+	cfqq->queued[cfq_crq_is_sync(crq)]++;
 
 	/*
 	 * looks a little odd, but the first insert might return an alias.
@@ -545,8 +604,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
-	if (!cfqq->on_rr)
-		cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued);
+	if (!cfq_cfqq_on_rr(cfqq))
+		cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -559,7 +618,7 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
 	if (ON_RB(&crq->rb_node)) {
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		cfqq->queued[crq->is_sync]--;
+		cfqq->queued[cfq_crq_is_sync(crq)]--;
 	}
 
 	cfq_add_crq_rb(crq);
@@ -568,7 +627,7 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
 
 {
-	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid);
+	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
 	struct rb_node *n;
 
 	if (!cfqq)
@@ -598,17 +657,19 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 	if (crq) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		if (crq->accounted) {
-			crq->accounted = 0;
+		if (cfq_crq_in_driver(crq)) {
+			cfq_clear_crq_in_driver(crq);
 			WARN_ON(!cfqd->rq_in_driver);
 			cfqd->rq_in_driver--;
 		}
-		if (crq->in_flight) {
-			crq->in_flight = 0;
-			WARN_ON(!cfqq->in_flight);
-			cfqq->in_flight--;
+		if (cfq_crq_in_flight(crq)) {
+			const int sync = cfq_crq_is_sync(crq);
+
+			cfq_clear_crq_in_flight(crq);
+			WARN_ON(!cfqq->on_dispatch[sync]);
+			cfqq->on_dispatch[sync]--;
 		}
-		crq->requeued = 1;
+		cfq_mark_crq_requeued(crq);
 	}
 }
 
@@ -712,8 +773,9 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		cfqq->slice_start = jiffies;
 		cfqq->slice_end = 0;
 		cfqq->slice_left = 0;
-		cfqq->must_alloc_slice = 0;
-		cfqq->fifo_expire = 0;
+		cfq_clear_cfqq_must_alloc_slice(cfqq);
+		cfq_clear_cfqq_fifo_expire(cfqq);
+		cfq_clear_cfqq_expired(cfqq);
 	}
 
 	cfqd->active_queue = cfqq;
@@ -776,9 +838,18 @@ static int cfq_get_next_prio_level(struct cfq_data *cfqd)
 	return prio;
 }
 
-static void cfq_set_active_queue(struct cfq_data *cfqd)
+static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 {
-	struct cfq_queue *cfqq = NULL;
+	struct cfq_queue *cfqq;
+
+	/*
+	 * if current queue is expired but not done with its requests yet,
+	 * wait for that to happen
+	 */
+	if ((cfqq = cfqd->active_queue) != NULL) {
+		if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))
+			return NULL;
+	}
 
 	/*
 	 * if current list is non-empty, grab first entry. if it is empty,
@@ -802,50 +873,66 @@ static void cfq_set_active_queue(struct cfq_data *cfqd)
 	}
 
 	__cfq_set_active_queue(cfqd, cfqq);
+	return cfqq;
 }
 
 /*
  * current cfqq expired its slice (or was too idle), select new one
  */
-static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+static void
+__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		    int preempted)
 {
-	struct cfq_queue *cfqq = cfqd->active_queue;
-
-	if (cfqq) {
-		unsigned long now = jiffies;
+	unsigned long now = jiffies;
 
-		if (cfqq->wait_request)
-			del_timer(&cfqd->idle_slice_timer);
+	if (cfq_cfqq_wait_request(cfqq))
+		del_timer(&cfqd->idle_slice_timer);
 
-		if (!preempted && !cfqq->in_flight)
-			cfqq->service_last = now;
+	if (!preempted && !cfq_cfqq_dispatched(cfqq))
+		cfqq->service_last = now;
 
-		cfqq->must_dispatch = 0;
-		cfqq->wait_request = 0;
+	cfq_clear_cfqq_must_dispatch(cfqq);
+	cfq_clear_cfqq_wait_request(cfqq);
 
-		/*
-		 * store what was left of this slice, if the queue idled out
-		 * or was preempted
-		 */
-		if (time_after(now, cfqq->slice_end))
-			cfqq->slice_left = now - cfqq->slice_end;
-		else
-			cfqq->slice_left = 0;
+	/*
+	 * store what was left of this slice, if the queue idled out
+	 * or was preempted
+	 */
+	if (time_after(now, cfqq->slice_end))
+		cfqq->slice_left = now - cfqq->slice_end;
+	else
+		cfqq->slice_left = 0;
 
-		if (cfqq->on_rr)
-			cfq_resort_rr_list(cfqq, preempted);
+	if (cfq_cfqq_on_rr(cfqq))
+		cfq_resort_rr_list(cfqq, preempted);
 
+	if (cfqq == cfqd->active_queue)
 		cfqd->active_queue = NULL;
 
-		if (cfqd->active_cic) {
-			put_io_context(cfqd->active_cic->ioc);
-			cfqd->active_cic = NULL;
-		}
+	if (cfqd->active_cic) {
+		put_io_context(cfqd->active_cic->ioc);
+		cfqd->active_cic = NULL;
 	}
 
 	cfqd->dispatch_slice = 0;
 }
 
+static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfqq) {
+		/*
+		 * use deferred expiry, if there are requests in progress as
+		 * not to disturb the slice of the next queue
+		 */
+		if (cfq_cfqq_dispatched(cfqq))
+			cfq_mark_cfqq_expired(cfqq);
+		else
+			__cfq_slice_expired(cfqd, cfqq, preempted);
+	}
+}
+
 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 {
@@ -857,7 +944,7 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	 */
 	if (!cfqd->cfq_slice_idle)
 		return 0;
-	if (!cfqq->idle_window)
+	if (!cfq_cfqq_idle_window(cfqq))
 		return 0;
 	/*
 	 * task has exited, don't wait
@@ -865,13 +952,13 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
 		return 0;
 
-	cfqq->wait_request = 1;
-	cfqq->must_alloc = 1;
+	cfq_mark_cfqq_must_dispatch(cfqq);
+	cfq_mark_cfqq_wait_request(cfqq);
 
 	if (!timer_pending(&cfqd->idle_slice_timer)) {
-		unsigned long slice_left = cfqq->slice_end - 1;
+		unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
 
-		cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left);
+		cfqd->idle_slice_timer.expires = jiffies + slice_left;
 		add_timer(&cfqd->idle_slice_timer);
 	}
 
@@ -901,7 +988,7 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
 			break;
 		if (!blk_fs_request(__rq))
 			break;
-		if (__crq->requeued)
+		if (cfq_crq_requeued(__crq))
 			break;
 
 		if (__rq->sector <= crq->request->sector)
@@ -920,9 +1007,10 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
 	cfq_del_crq_rb(crq);
 	cfq_remove_merge_hints(q, crq);
 
-	crq->in_flight = 1;
-	crq->requeued = 0;
-	cfqq->in_flight++;
+	cfq_mark_crq_in_flight(crq);
+	cfq_clear_crq_requeued(crq);
+
+	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
 	list_add_tail(&crq->request->queuelist, entry);
 }
 
@@ -935,16 +1023,16 @@ static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
 	struct request *rq;
 	struct cfq_rq *crq;
 
-	if (cfqq->fifo_expire)
+	if (cfq_cfqq_fifo_expire(cfqq))
 		return NULL;
 
 	if (!list_empty(&cfqq->fifo)) {
-		int fifo = cfq_cfqq_sync(cfqq);
+		int fifo = cfq_cfqq_class_sync(cfqq);
 
 		crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
 		rq = crq->request;
 		if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
-			cfqq->fifo_expire = 1;
+			cfq_mark_cfqq_fifo_expire(cfqq);
 			return crq;
 		}
 	}
@@ -953,7 +1041,9 @@ static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
 }
 
 /*
- * Scale schedule slice based on io priority
+ * Scale schedule slice based on io priority. Use the sync time slice only
+ * if a queue is marked sync and has sync io queued. A sync queue with async
+ * io only, should not get full sync slice length.
  */
 static inline int
 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -981,6 +1071,16 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
 }
 
+/*
+ * scheduler run of queue, if there are requests pending and no one in the
+ * driver that will restart queueing
+ */
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
+{
+	if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+		kblockd_schedule_work(&cfqd->unplug_work);
+}
+
 /*
  * get next queue for service
  */
@@ -993,11 +1093,14 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
 	if (!cfqq)
 		goto new_queue;
 
+	if (cfq_cfqq_expired(cfqq))
+		goto new_queue;
+
 	/*
 	 * slice has expired
 	 */
-	if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end))
-		goto new_queue;
+	if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
+		goto expire;
 
 	/*
 	 * if queue has requests, dispatch one. if not, check if
@@ -1005,17 +1108,18 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
 	 */
 	if (!RB_EMPTY(&cfqq->sort_list))
 		goto keep_queue;
-	else if (!force && cfq_cfqq_sync(cfqq) &&
+	else if (!force && cfq_cfqq_class_sync(cfqq) &&
 		 time_before(now, cfqq->slice_end)) {
 		if (cfq_arm_slice_timer(cfqd, cfqq))
 			return NULL;
 	}
 
-new_queue:
+expire:
 	cfq_slice_expired(cfqd, 0);
-	cfq_set_active_queue(cfqd);
+new_queue:
+	cfqq = cfq_set_active_queue(cfqd);
 keep_queue:
-	return cfqd->active_queue;
+	return cfqq;
 }
 
 static int
@@ -1083,8 +1187,8 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 
 	cfqq = cfq_select_queue(cfqd, force);
 	if (cfqq) {
-		cfqq->wait_request = 0;
-		cfqq->must_dispatch = 0;
+		cfq_clear_cfqq_must_dispatch(cfqq);
+		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
 
 		if (cfq_class_idle(cfqq))
@@ -1108,10 +1212,10 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq)
 	 * accounted bit is necessary since some drivers will call
 	 * elv_next_request() many times for the same request (eg ide)
 	 */
-	if (crq->accounted)
+	if (cfq_crq_in_driver(crq))
 		return;
 
-	crq->accounted = 1;
+	cfq_mark_crq_in_driver(crq);
 	cfqd->rq_in_driver++;
 }
 
@@ -1121,7 +1225,7 @@ cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
 	struct cfq_data *cfqd = cfqq->cfqd;
 	unsigned long now;
 
-	if (!crq->accounted)
+	if (!cfq_crq_in_driver(crq))
 		return;
 
 	now = jiffies;
@@ -1132,12 +1236,18 @@ cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
 	if (!cfq_class_idle(cfqq))
 		cfqd->last_end_request = now;
 
-	if (!cfqq->in_flight && cfqq->on_rr) {
-		cfqq->service_last = now;
-		cfq_resort_rr_list(cfqq, 0);
+	if (!cfq_cfqq_dispatched(cfqq)) {
+		if (cfq_cfqq_on_rr(cfqq)) {
+			cfqq->service_last = now;
+			cfq_resort_rr_list(cfqq, 0);
+		}
+		if (cfq_cfqq_expired(cfqq)) {
+			__cfq_slice_expired(cfqd, cfqq, 0);
+			cfq_schedule_dispatch(cfqd);
+		}
 	}
 
-	if (crq->is_sync)
+	if (cfq_crq_is_sync(crq))
 		crq->io_context->last_end_request = now;
 }
 
@@ -1153,10 +1263,13 @@ dispatch:
 
 		crq = RQ_DATA(rq);
 		if (crq) {
+			struct cfq_queue *cfqq = crq->cfq_queue;
+
 			/*
 			 * if idle window is disabled, allow queue buildup
 			 */
-			if (!crq->in_flight && !crq->cfq_queue->idle_window &&
+			if (!cfq_crq_in_driver(crq) &&
+			    !cfq_cfqq_idle_window(cfqq) &&
 			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
 				return NULL;
 
@@ -1190,11 +1303,11 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	BUG_ON(rb_first(&cfqq->sort_list));
 	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
-	BUG_ON(cfqq->on_rr);
+	BUG_ON(cfq_cfqq_on_rr(cfqq));
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
-		cfq_slice_expired(cfqd, 0);
-		kblockd_schedule_work(&cfqd->unplug_work);
+		__cfq_slice_expired(cfqd, cfqq, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	cfq_put_cfqd(cfqq->cfqd);
@@ -1208,15 +1321,17 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 }
 
 static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval)
+__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
+		    const int hashval)
 {
 	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
 	struct hlist_node *entry, *next;
 
 	hlist_for_each_safe(entry, next, hash_list) {
 		struct cfq_queue *__cfqq = list_entry_qhash(entry);
+		const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio);
 
-		if (__cfqq->key == key)
+		if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY))
 			return __cfqq;
 	}
 
@@ -1224,9 +1339,9 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval)
 }
 
 static struct cfq_queue *
-cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key)
+cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
 {
-	return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT));
+	return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
 }
 
 static void cfq_free_io_context(struct cfq_io_context *cic)
@@ -1255,8 +1370,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 	spin_lock(q->queue_lock);
 
 	if (unlikely(cic->cfqq == cfqd->active_queue)) {
-		cfq_slice_expired(cfqd, 0);
-		kblockd_schedule_work(&cfqd->unplug_work);
+		__cfq_slice_expired(cfqd, cic->cfqq, 0);
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	cfq_put_queue(cic->cfqq);
@@ -1313,7 +1428,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq)
 	struct task_struct *tsk = current;
 	int ioprio_class;
 
-	if (!cfqq->prio_changed)
+	if (!cfq_cfqq_prio_changed(cfqq))
 		return;
 
 	ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
@@ -1338,7 +1453,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq)
 		case IOPRIO_CLASS_IDLE:
 			cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
 			cfqq->ioprio = 7;
-			cfqq->idle_window = 0;
+			cfq_clear_cfqq_idle_window(cfqq);
 			break;
 	}
 
@@ -1349,10 +1464,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq)
 	cfqq->org_ioprio = cfqq->ioprio;
 	cfqq->org_ioprio_class = cfqq->ioprio_class;
 
-	if (cfqq->on_rr)
+	if (cfq_cfqq_on_rr(cfqq))
 		cfq_resort_rr_list(cfqq, 0);
 
-	cfqq->prio_changed = 0;
+	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
 static inline void changed_ioprio(struct cfq_queue *cfqq)
@@ -1361,7 +1476,7 @@ static inline void changed_ioprio(struct cfq_queue *cfqq)
 		struct cfq_data *cfqd = cfqq->cfqd;
 
 		spin_lock(cfqd->queue->queue_lock);
-		cfqq->prio_changed = 1;
+		cfq_mark_cfqq_prio_changed(cfqq);
 		cfq_init_prio_data(cfqq);
 		spin_unlock(cfqd->queue->queue_lock);
 	}
@@ -1383,13 +1498,14 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 }
 
 static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, unsigned int key, int gfp_mask)
+cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
+	      int gfp_mask)
 {
 	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
 
 retry:
-	cfqq = __cfq_find_cfq_hash(cfqd, key, hashval);
+	cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
 
 	if (!cfqq) {
 		if (new_cfqq) {
@@ -1423,10 +1539,9 @@ retry:
 		 * set ->slice_left to allow preemption for a new process
 		 */
 		cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
-		cfqq->idle_window = 1;
-		cfqq->ioprio = -1;
-		cfqq->ioprio_class = -1;
-		cfqq->prio_changed = 1;
+		cfq_mark_cfqq_idle_window(cfqq);
+		cfq_mark_cfqq_prio_changed(cfqq);
+		cfq_init_prio_data(cfqq);
 	}
 
 	if (new_cfqq)
@@ -1553,7 +1668,7 @@ static void
 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		       struct cfq_io_context *cic)
 {
-	int enable_idle = cfqq->idle_window;
+	int enable_idle = cfq_cfqq_idle_window(cfqq);
 
 	if (!cic->ioc->task || !cfqd->cfq_slice_idle)
 		enable_idle = 0;
@@ -1564,7 +1679,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			enable_idle = 1;
 	}
 
-	cfqq->idle_window = enable_idle;
+	if (enable_idle)
+		cfq_mark_cfqq_idle_window(cfqq);
+	else
+		cfq_clear_cfqq_idle_window(cfqq);
 }
 
 
@@ -1586,14 +1704,14 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 
 	if (cfq_class_idle(cfqq))
 		return 1;
-	if (!new_cfqq->wait_request)
+	if (!cfq_cfqq_wait_request(new_cfqq))
 		return 0;
 	/*
 	 * if it doesn't have slice left, forget it
 	 */
 	if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
 		return 0;
-	if (crq->is_sync && !cfq_cfqq_sync(cfqq))
+	if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
 		return 1;
 
 	return 0;
@@ -1614,7 +1732,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
 
 	cfqq->slice_end = cfqq->slice_left + jiffies;
-	cfq_slice_expired(cfqd, 1);
+	__cfq_slice_expired(cfqd, cfqq, 1);
 	__cfq_set_active_queue(cfqd, cfqq);
 }
 
@@ -1639,7 +1757,7 @@ static void
 cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 struct cfq_rq *crq)
 {
-	const int sync = crq->is_sync;
+	const int sync = cfq_crq_is_sync(crq);
 
 	cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
 
@@ -1658,8 +1776,8 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * immediately and flag that we must not expire this queue
 		 * just now
 		 */
-		if (cfqq->wait_request) {
-			cfqq->must_dispatch = 1;
+		if (cfq_cfqq_wait_request(cfqq)) {
+			cfq_mark_cfqq_must_dispatch(cfqq);
 			del_timer(&cfqd->idle_slice_timer);
 			cfq_start_queueing(cfqd, cfqq);
 		}
@@ -1670,7 +1788,7 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * has some old slice time left and is of higher priority
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		cfqq->must_dispatch = 1;
+		cfq_mark_cfqq_must_dispatch(cfqq);
 		cfq_start_queueing(cfqd, cfqq);
 	}
 }
@@ -1713,7 +1831,7 @@ cfq_insert_request(request_queue_t *q, struct request *rq, int where)
 			 * be kicked by __make_request() afterward.
 			 * Kick it here.
 			 */
-			kblockd_schedule_work(&cfqd->unplug_work);
+			cfq_schedule_dispatch(cfqd);
 			break;
 		case ELEVATOR_INSERT_FRONT:
 			list_add(&rq->queuelist, &q->queue_head);
@@ -1750,9 +1868,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
 
 	cfqq = crq->cfq_queue;
 
-	if (crq->in_flight) {
-		WARN_ON(!cfqq->in_flight);
-		cfqq->in_flight--;
+	if (cfq_crq_in_flight(crq)) {
+		const int sync = cfq_crq_is_sync(crq);
+
+		WARN_ON(!cfqq->on_dispatch[sync]);
+		cfqq->on_dispatch[sync]--;
 	}
 
 	cfq_account_completion(cfqq, crq);
@@ -1814,7 +1934,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 	 * refile between round-robin lists if we moved the priority class
 	 */
 	if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
-	    cfqq->on_rr)
+	    cfq_cfqq_on_rr(cfqq))
 		cfq_resort_rr_list(cfqq, 0);
 }
 
@@ -1830,23 +1950,27 @@ static inline int
 __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		struct task_struct *task, int rw)
 {
-	if (cfqq->wait_request && cfqq->must_alloc)
+#if 1
+	if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
+	    !cfq_cfqq_must_alloc_slice) {
+		cfq_mark_cfqq_must_alloc_slice(cfqq);
 		return ELV_MQUEUE_MUST;
+	}
 
 	return ELV_MQUEUE_MAY;
-#if 0
+#else
 	if (!cfqq || task->flags & PF_MEMALLOC)
 		return ELV_MQUEUE_MAY;
-	if (!cfqq->allocated[rw] || cfqq->must_alloc) {
-		if (cfqq->wait_request)
+	if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) {
+		if (cfq_cfqq_wait_request(cfqq))
 			return ELV_MQUEUE_MUST;
 
 		/*
 		 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
 		 * can quickly flood the queue with writes from a single task
 		 */
-		if (rw == READ || !cfqq->must_alloc_slice) {
-			cfqq->must_alloc_slice = 1;
+		if (rw == READ || !cfq_cfqq_must_alloc_slice) {
+			cfq_mark_cfqq_must_alloc_slice(cfqq);
 			return ELV_MQUEUE_MUST;
 		}
 
@@ -1881,7 +2005,7 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
 	 * so just lookup a possibly existing queue, or return 'may queue'
 	 * if that fails
 	 */
-	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw));
+	cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
 	if (cfqq) {
 		cfq_init_prio_data(cfqq);
 		cfq_prio_boost(cfqq);
@@ -1943,15 +2067,17 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		int gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
+	struct task_struct *tsk = current;
 	struct cfq_io_context *cic;
 	const int rw = rq_data_dir(rq);
+	pid_t key = cfq_queue_pid(tsk, rw);
 	struct cfq_queue *cfqq;
 	struct cfq_rq *crq;
 	unsigned long flags;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	cic = cfq_get_io_context(cfqd, cfq_queue_pid(current, rw), gfp_mask);
+	cic = cfq_get_io_context(cfqd, key, gfp_mask);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
@@ -1959,7 +2085,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		goto queue_fail;
 
 	if (!cic->cfqq) {
-		cfqq = cfq_get_queue(cfqd, current->pid, gfp_mask);
+		cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask);
 		if (!cfqq)
 			goto queue_fail;
 
@@ -1968,7 +2094,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		cfqq = cic->cfqq;
 
 	cfqq->allocated[rw]++;
-	cfqq->must_alloc = 0;
+	cfq_clear_cfqq_must_alloc(cfqq);
 	cfqd->rq_starved = 0;
 	atomic_inc(&cfqq->ref);
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -1981,9 +2107,15 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		crq->in_flight = crq->accounted = 0;
-		crq->is_sync = (rw == READ || process_sync(current));
-		crq->requeued = 0;
+		cfq_clear_crq_in_flight(crq);
+		cfq_clear_crq_in_driver(crq);
+		cfq_clear_crq_requeued(crq);
+
+		if (rw == READ || process_sync(tsk))
+			cfq_mark_crq_is_sync(crq);
+		else
+			cfq_clear_crq_is_sync(crq);
+
 		rq->elevator_private = crq;
 		return 0;
 	}
@@ -1991,7 +2123,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 	spin_lock_irqsave(q->queue_lock, flags);
 	cfqq->allocated[rw]--;
 	if (!(cfqq->allocated[0] + cfqq->allocated[1]))
-		cfqq->must_alloc = 1;
+		cfq_mark_cfqq_must_alloc(cfqq);
 	cfq_put_queue(cfqq);
 queue_fail:
 	if (cic)
@@ -2002,7 +2134,7 @@ queue_fail:
 	 * that would be an extremely rare OOM situation
 	 */
 	cfqd->rq_starved = 1;
-	kblockd_schedule_work(&cfqd->unplug_work);
+	cfq_schedule_dispatch(cfqd);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	return 1;
 }
@@ -2068,15 +2200,14 @@ static void cfq_idle_slice_timer(unsigned long data)
 		 * not expired and it has a request pending, let it dispatch
 		 */
 		if (!RB_EMPTY(&cfqq->sort_list)) {
-			cfqq->must_dispatch = 1;
+			cfq_mark_cfqq_must_dispatch(cfqq);
 			goto out_kick;
 		}
 	}
 expire:
 	cfq_slice_expired(cfqd, 0);
 out_kick:
-	if (cfq_pending_requests(cfqd))
-		kblockd_schedule_work(&cfqd->unplug_work);
+	cfq_schedule_dispatch(cfqd);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2099,11 +2230,17 @@ static void cfq_idle_class_timer(unsigned long data)
 		cfqd->idle_class_timer.expires = end;
 		add_timer(&cfqd->idle_class_timer);
 	} else
-		kblockd_schedule_work(&cfqd->unplug_work);
+		cfq_schedule_dispatch(cfqd);
 
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
 
+static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
+{
+	del_timer_sync(&cfqd->idle_slice_timer);
+	del_timer_sync(&cfqd->idle_class_timer);
+	blk_sync_queue(cfqd->queue);
+}
 
 static void cfq_put_cfqd(struct cfq_data *cfqd)
 {
@@ -2112,7 +2249,7 @@ static void cfq_put_cfqd(struct cfq_data *cfqd)
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
-	blk_sync_queue(q);
+	cfq_shutdown_timer_wq(cfqd);
 
 	blk_put_queue(q);
 
@@ -2126,8 +2263,7 @@ static void cfq_exit_queue(elevator_t *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
 
-	del_timer_sync(&cfqd->idle_slice_timer);
-	del_timer_sync(&cfqd->idle_class_timer);
+	cfq_shutdown_timer_wq(cfqd);
 	cfq_put_cfqd(cfqd);
 }
 
@@ -2198,6 +2334,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_max_depth = cfq_max_depth;
+
 	return 0;
 out_crqpool:
 	kfree(cfqd->cfq_hash);
@@ -2369,6 +2506,7 @@ static struct cfq_fs_entry cfq_max_depth_entry = {
 	.show = cfq_max_depth_show,
 	.store = cfq_max_depth_store,
 };
+
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
 	&cfq_queued_entry.attr,
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 7811300d88e..8a453a0b5e4 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -12,6 +12,7 @@
 
 #define IOPRIO_PRIO_CLASS(mask)	((mask) >> IOPRIO_CLASS_SHIFT)
 #define IOPRIO_PRIO_DATA(mask)	((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data)	(((class) << IOPRIO_CLASS_SHIFT) | data)
 
 #define ioprio_valid(mask)	(IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
 
-- 
cgit v1.2.3-70-g09d2


From 4808a1c0261176f9c7e28e7f108d41a381a7d0fc Mon Sep 17 00:00:00 2001
From: Olav Kongas <ok@artecdesign.ee>
Date: Sat, 9 Apr 2005 22:57:39 +0300
Subject: [PATCH] USB: Add isp116x-hcd USB host controller driver

This patch provides an "isp116x-hcd" driver for Philips'
ISP1160/ISP1161 USB host controllers.

The driver:
 - is relatively small, meant for use on embedded platforms.
 - runs usbtests 1-14 without problems for days.
 - has been in use by 6-7 different people on ARM and PPC platforms,
   running a range of devices including USB hubs.
 - supports suspend/resume of both the platform device and the root hub;
   supports remote wakeup of the root hub (but NOT the platform device)
   by USB devices.
 - does NOT support ISO transfers (nobody has asked for them).
 - is PIO-only.

Signed-off-by: Olav Kongas <ok@artecdesign.ee>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/Makefile           |    1 +
 drivers/usb/host/Kconfig       |   13 +
 drivers/usb/host/Makefile      |    1 +
 drivers/usb/host/isp116x-hcd.c | 1882 ++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/isp116x.h     |  583 +++++++++++++
 include/linux/usb_isp116x.h    |   47 +
 6 files changed, 2527 insertions(+)
 create mode 100644 drivers/usb/host/isp116x-hcd.c
 create mode 100644 drivers/usb/host/isp116x.h
 create mode 100644 include/linux/usb_isp116x.h

(limited to 'include/linux')

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index a61d4433a98..c149c06388b 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_USB)		+= core/
 obj-$(CONFIG_USB_MON)		+= mon/
 
 obj-$(CONFIG_USB_EHCI_HCD)	+= host/
+obj-$(CONFIG_USB_ISP116X_HCD)	+= host/
 obj-$(CONFIG_USB_OHCI_HCD)	+= host/
 obj-$(CONFIG_USB_UHCI_HCD)	+= host/
 obj-$(CONFIG_USB_SL811_HCD)	+= host/
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 19e598c9641..ed1899d307d 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -49,6 +49,19 @@ config USB_EHCI_ROOT_HUB_TT
 
 	  This supports the EHCI implementation from TransDimension Inc.
 
+config USB_ISP116X_HCD
+	tristate "ISP116X HCD support"
+	depends on USB
+	default N
+	---help---
+	  The ISP1160 and ISP1161 chips are USB host controllers. Enable this
+	  option if your board has this chip. If unsure, say N.
+
+	  This driver does not support isochronous transfers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called isp116x-hcd.
+
 config USB_OHCI_HCD
 	tristate "OHCI HCD support"
 	depends on USB && USB_ARCH_HAS_OHCI
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 5dbd3e7a27c..350d14fc1cc 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
+obj-$(CONFIG_USB_ISP116X_HCD)	+= isp116x-hcd.o
 obj-$(CONFIG_USB_OHCI_HCD)	+= ohci-hcd.o
 obj-$(CONFIG_USB_UHCI_HCD)	+= uhci-hcd.o
 obj-$(CONFIG_USB_SL811_HCD)	+= sl811-hcd.o
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
new file mode 100644
index 00000000000..69e7433d9ce
--- /dev/null
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -0,0 +1,1882 @@
+/*
+ * ISP116x HCD (Host Controller Driver) for USB.
+ *
+ * Derived from the SL811 HCD, rewritten for ISP116x.
+ * Copyright (C) 2005 Olav Kongas <ok@artecdesign.ee>
+ *
+ * Portions:
+ * Copyright (C) 2004 Psion Teklogix (for NetBook PRO)
+ * Copyright (C) 2004 David Brownell
+ *
+ * Periodic scheduling is based on Roman's OHCI code
+ * Copyright (C) 1999 Roman Weissgaerber
+ *
+ */
+
+/*
+ * The driver basically works. A number of people have used it with a range
+ * of devices.
+ *
+ *The driver passes all usbtests 1-14.
+ *
+ * Suspending/resuming of root hub via sysfs works. Remote wakeup works too.
+ * And suspending/resuming of platform device works too. Suspend/resume
+ * via HCD operations vector is not implemented.
+ *
+ * Iso transfer support is not implemented. Adding this would include
+ * implementing recovery from the failure to service the processed ITL
+ * fifo ram in time, which will involve chip reset.
+ *
+ * TODO:
+ + More testing of suspend/resume.
+*/
+
+/*
+  ISP116x chips require certain delays between accesses to its
+  registers. The following timing options exist.
+
+  1. Configure your memory controller (the best)
+  2. Implement platform-specific delay function possibly
+  combined with configuring the memory controller; see
+  include/linux/usb-isp116x.h for more info. Some broken
+  memory controllers line LH7A400 SMC need this. Also,
+  uncomment for that to work the following
+  USE_PLATFORM_DELAY macro.
+  3. Use ndelay (easiest, poorest). For that, uncomment
+  the following USE_NDELAY macro.
+*/
+#define USE_PLATFORM_DELAY
+//#define USE_NDELAY
+
+//#define DEBUG
+//#define VERBOSE
+/* Transfer descriptors. See dump_ptd() for printout format  */
+//#define PTD_TRACE
+/* enqueuing/finishing log of urbs */
+//#define URB_TRACE
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/usb.h>
+#include <linux/usb_isp116x.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/byteorder.h>
+
+#ifndef DEBUG
+#	define	STUB_DEBUG_FILE
+#endif
+
+#include "../core/hcd.h"
+#include "isp116x.h"
+
+#define DRIVER_VERSION	"08 Apr 2005"
+#define DRIVER_DESC	"ISP116x USB Host Controller Driver"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+static const char hcd_name[] = "isp116x-hcd";
+
+/*-----------------------------------------------------------------*/
+
+/*
+  Write len bytes to fifo, pad till 32-bit boundary
+ */
+static void write_ptddata_to_fifo(struct isp116x *isp116x, void *buf, int len)
+{
+	u8 *dp = (u8 *) buf;
+	u16 *dp2 = (u16 *) buf;
+	u16 w;
+	int quot = len % 4;
+
+	if ((unsigned long)dp2 & 1) {
+		/* not aligned */
+		for (; len > 1; len -= 2) {
+			w = *dp++;
+			w |= *dp++ << 8;
+			isp116x_raw_write_data16(isp116x, w);
+		}
+		if (len)
+			isp116x_write_data16(isp116x, (u16) * dp);
+	} else {
+		/* aligned */
+		for (; len > 1; len -= 2)
+			isp116x_raw_write_data16(isp116x, *dp2++);
+		if (len)
+			isp116x_write_data16(isp116x, 0xff & *((u8 *) dp2));
+	}
+	if (quot == 1 || quot == 2)
+		isp116x_raw_write_data16(isp116x, 0);
+}
+
+/*
+  Read len bytes from fifo and then read till 32-bit boundary.
+ */
+static void read_ptddata_from_fifo(struct isp116x *isp116x, void *buf, int len)
+{
+	u8 *dp = (u8 *) buf;
+	u16 *dp2 = (u16 *) buf;
+	u16 w;
+	int quot = len % 4;
+
+	if ((unsigned long)dp2 & 1) {
+		/* not aligned */
+		for (; len > 1; len -= 2) {
+			w = isp116x_raw_read_data16(isp116x);
+			*dp++ = w & 0xff;
+			*dp++ = (w >> 8) & 0xff;
+		}
+		if (len)
+			*dp = 0xff & isp116x_read_data16(isp116x);
+	} else {
+		/* aligned */
+		for (; len > 1; len -= 2)
+			*dp2++ = isp116x_raw_read_data16(isp116x);
+		if (len)
+			*(u8 *) dp2 = 0xff & isp116x_read_data16(isp116x);
+	}
+	if (quot == 1 || quot == 2)
+		isp116x_raw_read_data16(isp116x);
+}
+
+/*
+  Write ptd's and data for scheduled transfers into
+  the fifo ram. Fifo must be empty and ready.
+*/
+static void pack_fifo(struct isp116x *isp116x)
+{
+	struct isp116x_ep *ep;
+	struct ptd *ptd;
+	int buflen = isp116x->atl_last_dir == PTD_DIR_IN
+	    ? isp116x->atl_bufshrt : isp116x->atl_buflen;
+	int ptd_count = 0;
+
+	isp116x_write_reg16(isp116x, HCuPINT, HCuPINT_AIIEOT);
+	isp116x_write_reg16(isp116x, HCXFERCTR, buflen);
+	isp116x_write_addr(isp116x, HCATLPORT | ISP116x_WRITE_OFFSET);
+	for (ep = isp116x->atl_active; ep; ep = ep->active) {
+		++ptd_count;
+		ptd = &ep->ptd;
+		dump_ptd(ptd);
+		dump_ptd_out_data(ptd, ep->data);
+		isp116x_write_data16(isp116x, ptd->count);
+		isp116x_write_data16(isp116x, ptd->mps);
+		isp116x_write_data16(isp116x, ptd->len);
+		isp116x_write_data16(isp116x, ptd->faddr);
+		buflen -= sizeof(struct ptd);
+		/* Skip writing data for last IN PTD */
+		if (ep->active || (isp116x->atl_last_dir != PTD_DIR_IN)) {
+			write_ptddata_to_fifo(isp116x, ep->data, ep->length);
+			buflen -= ALIGN(ep->length, 4);
+		}
+	}
+	BUG_ON(buflen);
+}
+
+/*
+  Read the processed ptd's and data from fifo ram back to
+  URBs' buffers. Fifo must be full and done
+*/
+static void unpack_fifo(struct isp116x *isp116x)
+{
+	struct isp116x_ep *ep;
+	struct ptd *ptd;
+	int buflen = isp116x->atl_last_dir == PTD_DIR_IN
+	    ? isp116x->atl_buflen : isp116x->atl_bufshrt;
+
+	isp116x_write_reg16(isp116x, HCuPINT, HCuPINT_AIIEOT);
+	isp116x_write_reg16(isp116x, HCXFERCTR, buflen);
+	isp116x_write_addr(isp116x, HCATLPORT);
+	for (ep = isp116x->atl_active; ep; ep = ep->active) {
+		ptd = &ep->ptd;
+		ptd->count = isp116x_read_data16(isp116x);
+		ptd->mps = isp116x_read_data16(isp116x);
+		ptd->len = isp116x_read_data16(isp116x);
+		ptd->faddr = isp116x_read_data16(isp116x);
+		buflen -= sizeof(struct ptd);
+		/* Skip reading data for last Setup or Out PTD */
+		if (ep->active || (isp116x->atl_last_dir == PTD_DIR_IN)) {
+			read_ptddata_from_fifo(isp116x, ep->data, ep->length);
+			buflen -= ALIGN(ep->length, 4);
+		}
+		dump_ptd(ptd);
+		dump_ptd_in_data(ptd, ep->data);
+	}
+	BUG_ON(buflen);
+}
+
+/*---------------------------------------------------------------*/
+
+/*
+  Set up PTD's.
+*/
+static void preproc_atl_queue(struct isp116x *isp116x)
+{
+	struct isp116x_ep *ep;
+	struct urb *urb;
+	struct ptd *ptd;
+	u16 toggle, dir, len;
+
+	for (ep = isp116x->atl_active; ep; ep = ep->active) {
+		BUG_ON(list_empty(&ep->hep->urb_list));
+		urb = container_of(ep->hep->urb_list.next,
+				   struct urb, urb_list);
+		ptd = &ep->ptd;
+		len = ep->length;
+		spin_lock(&urb->lock);
+		ep->data = (unsigned char *)urb->transfer_buffer
+		    + urb->actual_length;
+
+		switch (ep->nextpid) {
+		case USB_PID_IN:
+			toggle = usb_gettoggle(urb->dev, ep->epnum, 0);
+			dir = PTD_DIR_IN;
+			break;
+		case USB_PID_OUT:
+			toggle = usb_gettoggle(urb->dev, ep->epnum, 1);
+			dir = PTD_DIR_OUT;
+			break;
+		case USB_PID_SETUP:
+			toggle = 0;
+			dir = PTD_DIR_SETUP;
+			len = sizeof(struct usb_ctrlrequest);
+			ep->data = urb->setup_packet;
+			break;
+		case USB_PID_ACK:
+			toggle = 1;
+			len = 0;
+			dir = (urb->transfer_buffer_length
+			       && usb_pipein(urb->pipe))
+			    ? PTD_DIR_OUT : PTD_DIR_IN;
+			break;
+		default:
+			/* To please gcc */
+			toggle = dir = 0;
+			ERR("%s %d: ep->nextpid %d\n", __func__, __LINE__,
+			    ep->nextpid);
+			BUG_ON(1);
+		}
+
+		ptd->count = PTD_CC_MSK | PTD_ACTIVE_MSK | PTD_TOGGLE(toggle);
+		ptd->mps = PTD_MPS(ep->maxpacket)
+		    | PTD_SPD(urb->dev->speed == USB_SPEED_LOW)
+		    | PTD_EP(ep->epnum);
+		ptd->len = PTD_LEN(len) | PTD_DIR(dir);
+		ptd->faddr = PTD_FA(usb_pipedevice(urb->pipe));
+		spin_unlock(&urb->lock);
+		if (!ep->active) {
+			ptd->mps |= PTD_LAST_MSK;
+			isp116x->atl_last_dir = dir;
+		}
+		isp116x->atl_bufshrt = sizeof(struct ptd) + isp116x->atl_buflen;
+		isp116x->atl_buflen = isp116x->atl_bufshrt + ALIGN(len, 4);
+	}
+}
+
+/*
+  Analyze transfer results, handle partial transfers and errors
+*/
+static void postproc_atl_queue(struct isp116x *isp116x)
+{
+	struct isp116x_ep *ep;
+	struct urb *urb;
+	struct usb_device *udev;
+	struct ptd *ptd;
+	int short_not_ok;
+	u8 cc;
+
+	for (ep = isp116x->atl_active; ep; ep = ep->active) {
+		BUG_ON(list_empty(&ep->hep->urb_list));
+		urb =
+		    container_of(ep->hep->urb_list.next, struct urb, urb_list);
+		udev = urb->dev;
+		ptd = &ep->ptd;
+		cc = PTD_GET_CC(ptd);
+
+		spin_lock(&urb->lock);
+		short_not_ok = 1;
+
+		/* Data underrun is special. For allowed underrun
+		   we clear the error and continue as normal. For
+		   forbidden underrun we finish the DATA stage
+		   immediately while for control transfer,
+		   we do a STATUS stage. */
+		if (cc == TD_DATAUNDERRUN) {
+			if (!(urb->transfer_flags & URB_SHORT_NOT_OK)) {
+				DBG("Allowed data underrun\n");
+				cc = TD_CC_NOERROR;
+				short_not_ok = 0;
+			} else {
+				ep->error_count = 1;
+				if (usb_pipecontrol(urb->pipe))
+					ep->nextpid = USB_PID_ACK;
+				else
+					usb_settoggle(udev, ep->epnum,
+						      ep->nextpid ==
+						      USB_PID_OUT,
+						      PTD_GET_TOGGLE(ptd) ^ 1);
+				urb->status = cc_to_error[TD_DATAUNDERRUN];
+				spin_unlock(&urb->lock);
+				continue;
+			}
+		}
+		/* Keep underrun error through the STATUS stage */
+		if (urb->status == cc_to_error[TD_DATAUNDERRUN])
+			cc = TD_DATAUNDERRUN;
+
+		if (cc != TD_CC_NOERROR && cc != TD_NOTACCESSED
+		    && (++ep->error_count >= 3 || cc == TD_CC_STALL
+			|| cc == TD_DATAOVERRUN)) {
+			if (urb->status == -EINPROGRESS)
+				urb->status = cc_to_error[cc];
+			if (ep->nextpid == USB_PID_ACK)
+				ep->nextpid = 0;
+			spin_unlock(&urb->lock);
+			continue;
+		}
+		/* According to usb spec, zero-length Int transfer signals
+		   finishing of the urb. Hey, does this apply only
+		   for IN endpoints? */
+		if (usb_pipeint(urb->pipe) && !PTD_GET_LEN(ptd)) {
+			if (urb->status == -EINPROGRESS)
+				urb->status = 0;
+			spin_unlock(&urb->lock);
+			continue;
+		}
+
+		/* Relax after previously failed, but later succeeded
+		   or correctly NAK'ed retransmission attempt */
+		if (ep->error_count
+		    && (cc == TD_CC_NOERROR || cc == TD_NOTACCESSED))
+			ep->error_count = 0;
+
+		/* Take into account idiosyncracies of the isp116x chip
+		   regarding toggle bit for failed transfers */
+		if (ep->nextpid == USB_PID_OUT)
+			usb_settoggle(udev, ep->epnum, 1, PTD_GET_TOGGLE(ptd)
+				      ^ (ep->error_count > 0));
+		else if (ep->nextpid == USB_PID_IN)
+			usb_settoggle(udev, ep->epnum, 0, PTD_GET_TOGGLE(ptd)
+				      ^ (ep->error_count > 0));
+
+		switch (ep->nextpid) {
+		case USB_PID_IN:
+		case USB_PID_OUT:
+			urb->actual_length += PTD_GET_COUNT(ptd);
+			if (PTD_GET_ACTIVE(ptd)
+			    || (cc != TD_CC_NOERROR && cc < 0x0E))
+				break;
+			if (urb->transfer_buffer_length != urb->actual_length) {
+				if (short_not_ok)
+					break;
+			} else {
+				if (urb->transfer_flags & URB_ZERO_PACKET
+				    && ep->nextpid == USB_PID_OUT
+				    && !(PTD_GET_COUNT(ptd) % ep->maxpacket)) {
+					DBG("Zero packet requested\n");
+					break;
+				}
+			}
+			/* All data for this URB is transferred, let's finish */
+			if (usb_pipecontrol(urb->pipe))
+				ep->nextpid = USB_PID_ACK;
+			else if (urb->status == -EINPROGRESS)
+				urb->status = 0;
+			break;
+		case USB_PID_SETUP:
+			if (PTD_GET_ACTIVE(ptd)
+			    || (cc != TD_CC_NOERROR && cc < 0x0E))
+				break;
+			if (urb->transfer_buffer_length == urb->actual_length)
+				ep->nextpid = USB_PID_ACK;
+			else if (usb_pipeout(urb->pipe)) {
+				usb_settoggle(udev, 0, 1, 1);
+				ep->nextpid = USB_PID_OUT;
+			} else {
+				usb_settoggle(udev, 0, 0, 1);
+				ep->nextpid = USB_PID_IN;
+			}
+			break;
+		case USB_PID_ACK:
+			if (PTD_GET_ACTIVE(ptd)
+			    || (cc != TD_CC_NOERROR && cc < 0x0E))
+				break;
+			if (urb->status == -EINPROGRESS)
+				urb->status = 0;
+			ep->nextpid = 0;
+			break;
+		default:
+			BUG_ON(1);
+		}
+		spin_unlock(&urb->lock);
+	}
+}
+
+/*
+  Take done or failed requests out of schedule. Give back
+  processed urbs.
+*/
+static void finish_request(struct isp116x *isp116x, struct isp116x_ep *ep,
+			   struct urb *urb, struct pt_regs *regs)
+__releases(isp116x->lock) __acquires(isp116x->lock)
+{
+	unsigned i;
+
+	urb->hcpriv = NULL;
+	ep->error_count = 0;
+
+	if (usb_pipecontrol(urb->pipe))
+		ep->nextpid = USB_PID_SETUP;
+
+	urb_dbg(urb, "Finish");
+
+	spin_unlock(&isp116x->lock);
+	usb_hcd_giveback_urb(isp116x_to_hcd(isp116x), urb, regs);
+	spin_lock(&isp116x->lock);
+
+	/* take idle endpoints out of the schedule */
+	if (!list_empty(&ep->hep->urb_list))
+		return;
+
+	/* async deschedule */
+	if (!list_empty(&ep->schedule)) {
+		list_del_init(&ep->schedule);
+		return;
+	}
+
+	/* periodic deschedule */
+	DBG("deschedule qh%d/%p branch %d\n", ep->period, ep, ep->branch);
+	for (i = ep->branch; i < PERIODIC_SIZE; i += ep->period) {
+		struct isp116x_ep *temp;
+		struct isp116x_ep **prev = &isp116x->periodic[i];
+
+		while (*prev && ((temp = *prev) != ep))
+			prev = &temp->next;
+		if (*prev)
+			*prev = ep->next;
+		isp116x->load[i] -= ep->load;
+	}
+	ep->branch = PERIODIC_SIZE;
+	isp116x_to_hcd(isp116x)->self.bandwidth_allocated -=
+	    ep->load / ep->period;
+
+	/* switch irq type? */
+	if (!--isp116x->periodic_count) {
+		isp116x->irqenb &= ~HCuPINT_SOF;
+		isp116x->irqenb |= HCuPINT_ATL;
+	}
+}
+
+/*
+  Scan transfer lists, schedule transfers, send data off
+  to chip.
+ */
+static void start_atl_transfers(struct isp116x *isp116x)
+{
+	struct isp116x_ep *last_ep = NULL, *ep;
+	struct urb *urb;
+	u16 load = 0;
+	int len, index, speed, byte_time;
+
+	if (atomic_read(&isp116x->atl_finishing))
+		return;
+
+	if (!HC_IS_RUNNING(isp116x_to_hcd(isp116x)->state))
+		return;
+
+	/* FIFO not empty? */
+	if (isp116x_read_reg16(isp116x, HCBUFSTAT) & HCBUFSTAT_ATL_FULL)
+		return;
+
+	isp116x->atl_active = NULL;
+	isp116x->atl_buflen = isp116x->atl_bufshrt = 0;
+
+	/* Schedule int transfers */
+	if (isp116x->periodic_count) {
+		isp116x->fmindex = index =
+		    (isp116x->fmindex + 1) & (PERIODIC_SIZE - 1);
+		if ((load = isp116x->load[index])) {
+			/* Bring all int transfers for this frame
+			   into the active queue */
+			isp116x->atl_active = last_ep =
+			    isp116x->periodic[index];
+			while (last_ep->next)
+				last_ep = (last_ep->active = last_ep->next);
+			last_ep->active = NULL;
+		}
+	}
+
+	/* Schedule control/bulk transfers */
+	list_for_each_entry(ep, &isp116x->async, schedule) {
+		urb = container_of(ep->hep->urb_list.next,
+				   struct urb, urb_list);
+		speed = urb->dev->speed;
+		byte_time = speed == USB_SPEED_LOW
+		    ? BYTE_TIME_LOWSPEED : BYTE_TIME_FULLSPEED;
+
+		if (ep->nextpid == USB_PID_SETUP) {
+			len = sizeof(struct usb_ctrlrequest);
+		} else if (ep->nextpid == USB_PID_ACK) {
+			len = 0;
+		} else {
+			/* Find current free length ... */
+			len = (MAX_LOAD_LIMIT - load) / byte_time;
+
+			/* ... then limit it to configured max size ... */
+			len = min(len, speed == USB_SPEED_LOW ?
+				  MAX_TRANSFER_SIZE_LOWSPEED :
+				  MAX_TRANSFER_SIZE_FULLSPEED);
+
+			/* ... and finally cut to the multiple of MaxPacketSize,
+			   or to the real length if there's enough room. */
+			if (len <
+			    (urb->transfer_buffer_length -
+			     urb->actual_length)) {
+				len -= len % ep->maxpacket;
+				if (!len)
+					continue;
+			} else
+				len = urb->transfer_buffer_length -
+				    urb->actual_length;
+			BUG_ON(len < 0);
+		}
+
+		load += len * byte_time;
+		if (load > MAX_LOAD_LIMIT)
+			break;
+
+		ep->active = NULL;
+		ep->length = len;
+		if (last_ep)
+			last_ep->active = ep;
+		else
+			isp116x->atl_active = ep;
+		last_ep = ep;
+	}
+
+	/* Avoid starving of endpoints */
+	if ((&isp116x->async)->next != (&isp116x->async)->prev)
+		list_move(&isp116x->async, (&isp116x->async)->next);
+
+	if (isp116x->atl_active) {
+		preproc_atl_queue(isp116x);
+		pack_fifo(isp116x);
+	}
+}
+
+/*
+  Finish the processed transfers
+*/
+static void finish_atl_transfers(struct isp116x *isp116x, struct pt_regs *regs)
+{
+	struct isp116x_ep *ep;
+	struct urb *urb;
+
+	if (!isp116x->atl_active)
+		return;
+	/* Fifo not ready? */
+	if (!(isp116x_read_reg16(isp116x, HCBUFSTAT) & HCBUFSTAT_ATL_DONE))
+		return;
+
+	atomic_inc(&isp116x->atl_finishing);
+	unpack_fifo(isp116x);
+	postproc_atl_queue(isp116x);
+	for (ep = isp116x->atl_active; ep; ep = ep->active) {
+		urb =
+		    container_of(ep->hep->urb_list.next, struct urb, urb_list);
+		/* USB_PID_ACK check here avoids finishing of
+		   control transfers, for which TD_DATAUNDERRUN
+		   occured, while URB_SHORT_NOT_OK was set */
+		if (urb && urb->status != -EINPROGRESS
+		    && ep->nextpid != USB_PID_ACK)
+			finish_request(isp116x, ep, urb, regs);
+	}
+	atomic_dec(&isp116x->atl_finishing);
+}
+
+static irqreturn_t isp116x_irq(struct usb_hcd *hcd, struct pt_regs *regs)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	u16 irqstat;
+	irqreturn_t ret = IRQ_NONE;
+
+	spin_lock(&isp116x->lock);
+	isp116x_write_reg16(isp116x, HCuPINTENB, 0);
+	irqstat = isp116x_read_reg16(isp116x, HCuPINT);
+	isp116x_write_reg16(isp116x, HCuPINT, irqstat);
+
+	if (irqstat & (HCuPINT_ATL | HCuPINT_SOF)) {
+		ret = IRQ_HANDLED;
+		finish_atl_transfers(isp116x, regs);
+	}
+
+	if (irqstat & HCuPINT_OPR) {
+		u32 intstat = isp116x_read_reg32(isp116x, HCINTSTAT);
+		isp116x_write_reg32(isp116x, HCINTSTAT, intstat);
+		if (intstat & HCINT_UE) {
+			ERR("Unrecoverable error\n");
+			/* What should we do here? Reset?  */
+		}
+		if (intstat & HCINT_RHSC) {
+			isp116x->rhstatus =
+			    isp116x_read_reg32(isp116x, HCRHSTATUS);
+			isp116x->rhport[0] =
+			    isp116x_read_reg32(isp116x, HCRHPORT1);
+			isp116x->rhport[1] =
+			    isp116x_read_reg32(isp116x, HCRHPORT2);
+		}
+		if (intstat & HCINT_RD) {
+			DBG("---- remote wakeup\n");
+			schedule_work(&isp116x->rh_resume);
+			ret = IRQ_HANDLED;
+		}
+		irqstat &= ~HCuPINT_OPR;
+		ret = IRQ_HANDLED;
+	}
+
+	if (irqstat & (HCuPINT_ATL | HCuPINT_SOF)) {
+		start_atl_transfers(isp116x);
+	}
+
+	isp116x_write_reg16(isp116x, HCuPINTENB, isp116x->irqenb);
+	spin_unlock(&isp116x->lock);
+	return ret;
+}
+
+/*-----------------------------------------------------------------*/
+
+/* usb 1.1 says max 90% of a frame is available for periodic transfers.
+ * this driver doesn't promise that much since it's got to handle an
+ * IRQ per packet; irq handling latencies also use up that time.
+ */
+
+/* out of 1000 us */
+#define	MAX_PERIODIC_LOAD	600
+static int balance(struct isp116x *isp116x, u16 period, u16 load)
+{
+	int i, branch = -ENOSPC;
+
+	/* search for the least loaded schedule branch of that period
+	   which has enough bandwidth left unreserved. */
+	for (i = 0; i < period; i++) {
+		if (branch < 0 || isp116x->load[branch] > isp116x->load[i]) {
+			int j;
+
+			for (j = i; j < PERIODIC_SIZE; j += period) {
+				if ((isp116x->load[j] + load)
+				    > MAX_PERIODIC_LOAD)
+					break;
+			}
+			if (j < PERIODIC_SIZE)
+				continue;
+			branch = i;
+		}
+	}
+	return branch;
+}
+
+/* NB! ALL the code above this point runs with isp116x->lock
+   held, irqs off
+*/
+
+/*-----------------------------------------------------------------*/
+
+static int isp116x_urb_enqueue(struct usb_hcd *hcd,
+			       struct usb_host_endpoint *hep, struct urb *urb,
+			       int mem_flags)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	struct usb_device *udev = urb->dev;
+	unsigned int pipe = urb->pipe;
+	int is_out = !usb_pipein(pipe);
+	int type = usb_pipetype(pipe);
+	int epnum = usb_pipeendpoint(pipe);
+	struct isp116x_ep *ep = NULL;
+	unsigned long flags;
+	int i;
+	int ret = 0;
+
+	urb_dbg(urb, "Enqueue");
+
+	if (type == PIPE_ISOCHRONOUS) {
+		ERR("Isochronous transfers not supported\n");
+		urb_dbg(urb, "Refused to enqueue");
+		return -ENXIO;
+	}
+	/* avoid all allocations within spinlocks: request or endpoint */
+	if (!hep->hcpriv) {
+		ep = kcalloc(1, sizeof *ep, (__force unsigned)mem_flags);
+		if (!ep)
+			return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	if (!HC_IS_RUNNING(hcd->state)) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	if (hep->hcpriv)
+		ep = hep->hcpriv;
+	else {
+		INIT_LIST_HEAD(&ep->schedule);
+		ep->udev = usb_get_dev(udev);
+		ep->epnum = epnum;
+		ep->maxpacket = usb_maxpacket(udev, urb->pipe, is_out);
+		usb_settoggle(udev, epnum, is_out, 0);
+
+		if (type == PIPE_CONTROL) {
+			ep->nextpid = USB_PID_SETUP;
+		} else if (is_out) {
+			ep->nextpid = USB_PID_OUT;
+		} else {
+			ep->nextpid = USB_PID_IN;
+		}
+
+		if (urb->interval) {
+			/*
+			   With INT URBs submitted, the driver works with SOF
+			   interrupt enabled and ATL interrupt disabled. After
+			   the PTDs are written to fifo ram, the chip starts
+			   fifo processing and usb transfers after the next
+			   SOF and continues until the transfers are finished
+			   (succeeded or failed) or the frame ends. Therefore,
+			   the transfers occur only in every second frame,
+			   while fifo reading/writing and data processing
+			   occur in every other second frame. */
+			if (urb->interval < 2)
+				urb->interval = 2;
+			if (urb->interval > 2 * PERIODIC_SIZE)
+				urb->interval = 2 * PERIODIC_SIZE;
+			ep->period = urb->interval >> 1;
+			ep->branch = PERIODIC_SIZE;
+			ep->load = usb_calc_bus_time(udev->speed,
+						     !is_out,
+						     (type == PIPE_ISOCHRONOUS),
+						     usb_maxpacket(udev, pipe,
+								   is_out)) /
+			    1000;
+		}
+		hep->hcpriv = ep;
+		ep->hep = hep;
+	}
+
+	/* maybe put endpoint into schedule */
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		if (list_empty(&ep->schedule))
+			list_add_tail(&ep->schedule, &isp116x->async);
+		break;
+	case PIPE_INTERRUPT:
+		urb->interval = ep->period;
+		ep->length = min((int)ep->maxpacket,
+				 urb->transfer_buffer_length);
+
+		/* urb submitted for already existing endpoint */
+		if (ep->branch < PERIODIC_SIZE)
+			break;
+
+		ret = ep->branch = balance(isp116x, ep->period, ep->load);
+		if (ret < 0)
+			goto fail;
+		ret = 0;
+
+		urb->start_frame = (isp116x->fmindex & (PERIODIC_SIZE - 1))
+		    + ep->branch;
+
+		/* sort each schedule branch by period (slow before fast)
+		   to share the faster parts of the tree without needing
+		   dummy/placeholder nodes */
+		DBG("schedule qh%d/%p branch %d\n", ep->period, ep, ep->branch);
+		for (i = ep->branch; i < PERIODIC_SIZE; i += ep->period) {
+			struct isp116x_ep **prev = &isp116x->periodic[i];
+			struct isp116x_ep *here = *prev;
+
+			while (here && ep != here) {
+				if (ep->period > here->period)
+					break;
+				prev = &here->next;
+				here = *prev;
+			}
+			if (ep != here) {
+				ep->next = here;
+				*prev = ep;
+			}
+			isp116x->load[i] += ep->load;
+		}
+		hcd->self.bandwidth_allocated += ep->load / ep->period;
+
+		/* switch over to SOFint */
+		if (!isp116x->periodic_count++) {
+			isp116x->irqenb &= ~HCuPINT_ATL;
+			isp116x->irqenb |= HCuPINT_SOF;
+			isp116x_write_reg16(isp116x, HCuPINTENB,
+					    isp116x->irqenb);
+		}
+	}
+
+	/* in case of unlink-during-submit */
+	spin_lock(&urb->lock);
+	if (urb->status != -EINPROGRESS) {
+		spin_unlock(&urb->lock);
+		finish_request(isp116x, ep, urb, NULL);
+		ret = 0;
+		goto fail;
+	}
+	urb->hcpriv = hep;
+	spin_unlock(&urb->lock);
+	start_atl_transfers(isp116x);
+
+      fail:
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return ret;
+}
+
+/*
+   Dequeue URBs.
+*/
+static int isp116x_urb_dequeue(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	struct usb_host_endpoint *hep;
+	struct isp116x_ep *ep, *ep_act;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	hep = urb->hcpriv;
+	/* URB already unlinked (or never linked)? */
+	if (!hep) {
+		spin_unlock_irqrestore(&isp116x->lock, flags);
+		return 0;
+	}
+	ep = hep->hcpriv;
+	WARN_ON(hep != ep->hep);
+
+	/* In front of queue? */
+	if (ep->hep->urb_list.next == &urb->urb_list)
+		/* active? */
+		for (ep_act = isp116x->atl_active; ep_act;
+		     ep_act = ep_act->active)
+			if (ep_act == ep) {
+				VDBG("dequeue, urb %p active; wait for irq\n",
+				     urb);
+				urb = NULL;
+				break;
+			}
+
+	if (urb)
+		finish_request(isp116x, ep, urb, NULL);
+
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return 0;
+}
+
+static void isp116x_endpoint_disable(struct usb_hcd *hcd,
+				     struct usb_host_endpoint *hep)
+{
+	int i;
+	struct isp116x_ep *ep = hep->hcpriv;;
+
+	if (!ep)
+		return;
+
+	/* assume we'd just wait for the irq */
+	for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
+		msleep(3);
+	if (!list_empty(&hep->urb_list))
+		WARN("ep %p not empty?\n", ep);
+
+	usb_put_dev(ep->udev);
+	kfree(ep);
+	hep->hcpriv = NULL;
+}
+
+static int isp116x_get_frame(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	u32 fmnum;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	fmnum = isp116x_read_reg32(isp116x, HCFMNUM);
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return (int)fmnum;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+  Adapted from ohci-hub.c. Currently we don't support autosuspend.
+*/
+static int isp116x_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	int ports, i, changed = 0;
+
+	if (!HC_IS_RUNNING(hcd->state))
+		return -ESHUTDOWN;
+
+	ports = isp116x->rhdesca & RH_A_NDP;
+
+	/* init status */
+	if (isp116x->rhstatus & (RH_HS_LPSC | RH_HS_OCIC))
+		buf[0] = changed = 1;
+	else
+		buf[0] = 0;
+
+	for (i = 0; i < ports; i++) {
+		u32 status = isp116x->rhport[i];
+
+		if (status & (RH_PS_CSC | RH_PS_PESC | RH_PS_PSSC
+			      | RH_PS_OCIC | RH_PS_PRSC)) {
+			changed = 1;
+			buf[0] |= 1 << (i + 1);
+			continue;
+		}
+	}
+	return changed;
+}
+
+static void isp116x_hub_descriptor(struct isp116x *isp116x,
+				   struct usb_hub_descriptor *desc)
+{
+	u32 reg = isp116x->rhdesca;
+
+	desc->bDescriptorType = 0x29;
+	desc->bDescLength = 9;
+	desc->bHubContrCurrent = 0;
+	desc->bNbrPorts = (u8) (reg & 0x3);
+	/* Power switching, device type, overcurrent. */
+	desc->wHubCharacteristics =
+	    (__force __u16) cpu_to_le16((u16) ((reg >> 8) & 0x1f));
+	desc->bPwrOn2PwrGood = (u8) ((reg >> 24) & 0xff);
+	/* two bitmaps:  ports removable, and legacy PortPwrCtrlMask */
+	desc->bitmap[0] = desc->bNbrPorts == 1 ? 1 << 1 : 3 << 1;
+	desc->bitmap[1] = ~0;
+}
+
+/* Perform reset of a given port.
+   It would be great to just start the reset and let the
+   USB core to clear the reset in due time. However,
+   root hub ports should be reset for at least 50 ms, while
+   our chip stays in reset for about 10 ms. I.e., we must
+   repeatedly reset it ourself here.
+*/
+static inline void root_port_reset(struct isp116x *isp116x, unsigned port)
+{
+	u32 tmp;
+	unsigned long flags, t;
+
+	/* Root hub reset should be 50 ms, but some devices
+	   want it even longer. */
+	t = jiffies + msecs_to_jiffies(100);
+
+	while (time_before(jiffies, t)) {
+		spin_lock_irqsave(&isp116x->lock, flags);
+		/* spin until any current reset finishes */
+		for (;;) {
+			tmp = isp116x_read_reg32(isp116x, port ?
+						 HCRHPORT2 : HCRHPORT1);
+			if (!(tmp & RH_PS_PRS))
+				break;
+			udelay(500);
+		}
+		/* Don't reset a disconnected port */
+		if (!(tmp & RH_PS_CCS)) {
+			spin_unlock_irqrestore(&isp116x->lock, flags);
+			break;
+		}
+		/* Reset lasts 10ms (claims datasheet) */
+		isp116x_write_reg32(isp116x, port ? HCRHPORT2 :
+				    HCRHPORT1, (RH_PS_PRS));
+		spin_unlock_irqrestore(&isp116x->lock, flags);
+		msleep(10);
+	}
+}
+
+/* Adapted from ohci-hub.c */
+static int isp116x_hub_control(struct usb_hcd *hcd,
+			       u16 typeReq,
+			       u16 wValue, u16 wIndex, char *buf, u16 wLength)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	int ret = 0;
+	unsigned long flags;
+	int ports = isp116x->rhdesca & RH_A_NDP;
+	u32 tmp = 0;
+
+	switch (typeReq) {
+	case ClearHubFeature:
+		DBG("ClearHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+			DBG("C_HUB_OVER_CURRENT\n");
+			spin_lock_irqsave(&isp116x->lock, flags);
+			isp116x_write_reg32(isp116x, HCRHSTATUS, RH_HS_OCIC);
+			spin_unlock_irqrestore(&isp116x->lock, flags);
+		case C_HUB_LOCAL_POWER:
+			DBG("C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case SetHubFeature:
+		DBG("SetHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+		case C_HUB_LOCAL_POWER:
+			DBG("C_HUB_OVER_CURRENT or C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case GetHubDescriptor:
+		DBG("GetHubDescriptor\n");
+		isp116x_hub_descriptor(isp116x,
+				       (struct usb_hub_descriptor *)buf);
+		break;
+	case GetHubStatus:
+		DBG("GetHubStatus\n");
+		*(__le32 *) buf = cpu_to_le32(0);
+		break;
+	case GetPortStatus:
+		DBG("GetPortStatus\n");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		tmp = isp116x->rhport[--wIndex];
+		*(__le32 *) buf = cpu_to_le32(tmp);
+		DBG("GetPortStatus: port[%d]  %08x\n", wIndex + 1, tmp);
+		break;
+	case ClearPortFeature:
+		DBG("ClearPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+
+		switch (wValue) {
+		case USB_PORT_FEAT_ENABLE:
+			DBG("USB_PORT_FEAT_ENABLE\n");
+			tmp = RH_PS_CCS;
+			break;
+		case USB_PORT_FEAT_C_ENABLE:
+			DBG("USB_PORT_FEAT_C_ENABLE\n");
+			tmp = RH_PS_PESC;
+			break;
+		case USB_PORT_FEAT_SUSPEND:
+			DBG("USB_PORT_FEAT_SUSPEND\n");
+			tmp = RH_PS_POCI;
+			break;
+		case USB_PORT_FEAT_C_SUSPEND:
+			DBG("USB_PORT_FEAT_C_SUSPEND\n");
+			tmp = RH_PS_PSSC;
+			break;
+		case USB_PORT_FEAT_POWER:
+			DBG("USB_PORT_FEAT_POWER\n");
+			tmp = RH_PS_LSDA;
+			break;
+		case USB_PORT_FEAT_C_CONNECTION:
+			DBG("USB_PORT_FEAT_C_CONNECTION\n");
+			tmp = RH_PS_CSC;
+			break;
+		case USB_PORT_FEAT_C_OVER_CURRENT:
+			DBG("USB_PORT_FEAT_C_OVER_CURRENT\n");
+			tmp = RH_PS_OCIC;
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			DBG("USB_PORT_FEAT_C_RESET\n");
+			tmp = RH_PS_PRSC;
+			break;
+		default:
+			goto error;
+		}
+		spin_lock_irqsave(&isp116x->lock, flags);
+		isp116x_write_reg32(isp116x, wIndex
+				    ? HCRHPORT2 : HCRHPORT1, tmp);
+		isp116x->rhport[wIndex] =
+		    isp116x_read_reg32(isp116x, wIndex ? HCRHPORT2 : HCRHPORT1);
+		spin_unlock_irqrestore(&isp116x->lock, flags);
+		break;
+	case SetPortFeature:
+		DBG("SetPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			DBG("USB_PORT_FEAT_SUSPEND\n");
+			spin_lock_irqsave(&isp116x->lock, flags);
+			isp116x_write_reg32(isp116x, wIndex
+					    ? HCRHPORT2 : HCRHPORT1, RH_PS_PSS);
+			break;
+		case USB_PORT_FEAT_POWER:
+			DBG("USB_PORT_FEAT_POWER\n");
+			spin_lock_irqsave(&isp116x->lock, flags);
+			isp116x_write_reg32(isp116x, wIndex
+					    ? HCRHPORT2 : HCRHPORT1, RH_PS_PPS);
+			break;
+		case USB_PORT_FEAT_RESET:
+			DBG("USB_PORT_FEAT_RESET\n");
+			root_port_reset(isp116x, wIndex);
+			spin_lock_irqsave(&isp116x->lock, flags);
+			break;
+		default:
+			goto error;
+		}
+		isp116x->rhport[wIndex] =
+		    isp116x_read_reg32(isp116x, wIndex ? HCRHPORT2 : HCRHPORT1);
+		spin_unlock_irqrestore(&isp116x->lock, flags);
+		break;
+
+	default:
+	      error:
+		/* "protocol stall" on error */
+		DBG("PROTOCOL STALL\n");
+		ret = -EPIPE;
+	}
+	return ret;
+}
+
+#ifdef	CONFIG_PM
+
+static int isp116x_hub_suspend(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	unsigned long flags;
+	u32 val;
+	int ret = 0;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+
+	val = isp116x_read_reg32(isp116x, HCCONTROL);
+	switch (val & HCCONTROL_HCFS) {
+	case HCCONTROL_USB_OPER:
+		hcd->state = HC_STATE_QUIESCING;
+		val &= (~HCCONTROL_HCFS & ~HCCONTROL_RWE);
+		val |= HCCONTROL_USB_SUSPEND;
+		if (hcd->remote_wakeup)
+			val |= HCCONTROL_RWE;
+		/* Wait for usb transfers to finish */
+		mdelay(2);
+		isp116x_write_reg32(isp116x, HCCONTROL, val);
+		hcd->state = HC_STATE_SUSPENDED;
+		/* Wait for devices to suspend */
+		mdelay(5);
+	case HCCONTROL_USB_SUSPEND:
+		break;
+	case HCCONTROL_USB_RESUME:
+		isp116x_write_reg32(isp116x, HCCONTROL,
+				    (val & ~HCCONTROL_HCFS) |
+				    HCCONTROL_USB_RESET);
+	case HCCONTROL_USB_RESET:
+		ret = -EBUSY;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return ret;
+}
+
+static int isp116x_hub_resume(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	u32 val;
+	int ret = -EINPROGRESS;
+
+	msleep(5);
+	spin_lock_irq(&isp116x->lock);
+
+	val = isp116x_read_reg32(isp116x, HCCONTROL);
+	switch (val & HCCONTROL_HCFS) {
+	case HCCONTROL_USB_SUSPEND:
+		val &= ~HCCONTROL_HCFS;
+		val |= HCCONTROL_USB_RESUME;
+		isp116x_write_reg32(isp116x, HCCONTROL, val);
+	case HCCONTROL_USB_RESUME:
+		break;
+	case HCCONTROL_USB_OPER:
+		/* Without setting power_state here the
+		   SUSPENDED state won't be removed from
+		   sysfs/usbN/power.state as a response to remote
+		   wakeup. Maybe in the future. */
+		hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+		ret = 0;
+		break;
+	default:
+		ret = -EBUSY;
+	}
+
+	if (ret != -EINPROGRESS) {
+		spin_unlock_irq(&isp116x->lock);
+		return ret;
+	}
+
+	val = isp116x->rhdesca & RH_A_NDP;
+	while (val--) {
+		u32 stat =
+		    isp116x_read_reg32(isp116x, val ? HCRHPORT2 : HCRHPORT1);
+		/* force global, not selective, resume */
+		if (!(stat & RH_PS_PSS))
+			continue;
+		DBG("%s: Resuming port %d\n", __func__, val);
+		isp116x_write_reg32(isp116x, RH_PS_POCI, val
+				    ? HCRHPORT2 : HCRHPORT1);
+	}
+	spin_unlock_irq(&isp116x->lock);
+
+	hcd->state = HC_STATE_RESUMING;
+	mdelay(20);
+
+	/* Go operational */
+	spin_lock_irq(&isp116x->lock);
+	val = isp116x_read_reg32(isp116x, HCCONTROL);
+	isp116x_write_reg32(isp116x, HCCONTROL,
+			    (val & ~HCCONTROL_HCFS) | HCCONTROL_USB_OPER);
+	spin_unlock_irq(&isp116x->lock);
+	/* see analogous comment above */
+	hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+	hcd->state = HC_STATE_RUNNING;
+
+	return 0;
+}
+
+static void isp116x_rh_resume(void *_hcd)
+{
+	struct usb_hcd *hcd = _hcd;
+
+	usb_resume_device(hcd->self.root_hub);
+}
+
+#else
+
+#define	isp116x_hub_suspend	NULL
+#define	isp116x_hub_resume	NULL
+
+static void isp116x_rh_resume(void *_hcd)
+{
+}
+
+#endif
+
+/*-----------------------------------------------------------------*/
+
+#ifdef STUB_DEBUG_FILE
+
+static inline void create_debug_file(struct isp116x *isp116x)
+{
+}
+
+static inline void remove_debug_file(struct isp116x *isp116x)
+{
+}
+
+#else
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static void dump_irq(struct seq_file *s, char *label, u16 mask)
+{
+	seq_printf(s, "%s %04x%s%s%s%s%s%s\n", label, mask,
+		   mask & HCuPINT_CLKRDY ? " clkrdy" : "",
+		   mask & HCuPINT_SUSP ? " susp" : "",
+		   mask & HCuPINT_OPR ? " opr" : "",
+		   mask & HCuPINT_AIIEOT ? " eot" : "",
+		   mask & HCuPINT_ATL ? " atl" : "",
+		   mask & HCuPINT_SOF ? " sof" : "");
+}
+
+static void dump_int(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%s %08x%s%s%s%s%s%s%s\n", label, mask,
+		   mask & HCINT_MIE ? " MIE" : "",
+		   mask & HCINT_RHSC ? " rhsc" : "",
+		   mask & HCINT_FNO ? " fno" : "",
+		   mask & HCINT_UE ? " ue" : "",
+		   mask & HCINT_RD ? " rd" : "",
+		   mask & HCINT_SF ? " sof" : "", mask & HCINT_SO ? " so" : "");
+}
+
+static int proc_isp116x_show(struct seq_file *s, void *unused)
+{
+	struct isp116x *isp116x = s->private;
+	struct isp116x_ep *ep;
+	struct urb *urb;
+	unsigned i;
+	char *str;
+
+	seq_printf(s, "%s\n%s version %s\n",
+		   isp116x_to_hcd(isp116x)->product_desc, hcd_name,
+		   DRIVER_VERSION);
+
+	if (HC_IS_SUSPENDED(isp116x_to_hcd(isp116x)->state)) {
+		seq_printf(s, "HCD is suspended\n");
+		return 0;
+	}
+	if (!HC_IS_RUNNING(isp116x_to_hcd(isp116x)->state)) {
+		seq_printf(s, "HCD not running\n");
+		return 0;
+	}
+
+	spin_lock_irq(&isp116x->lock);
+
+	dump_irq(s, "hc_irq_enable", isp116x_read_reg16(isp116x, HCuPINTENB));
+	dump_irq(s, "hc_irq_status", isp116x_read_reg16(isp116x, HCuPINT));
+	dump_int(s, "hc_int_enable", isp116x_read_reg32(isp116x, HCINTENB));
+	dump_int(s, "hc_int_status", isp116x_read_reg32(isp116x, HCINTSTAT));
+
+	list_for_each_entry(ep, &isp116x->async, schedule) {
+
+		switch (ep->nextpid) {
+		case USB_PID_IN:
+			str = "in";
+			break;
+		case USB_PID_OUT:
+			str = "out";
+			break;
+		case USB_PID_SETUP:
+			str = "setup";
+			break;
+		case USB_PID_ACK:
+			str = "status";
+			break;
+		default:
+			str = "?";
+			break;
+		};
+		seq_printf(s, "%p, ep%d%s, maxpacket %d:\n", ep,
+			   ep->epnum, str, ep->maxpacket);
+		list_for_each_entry(urb, &ep->hep->urb_list, urb_list) {
+			seq_printf(s, "  urb%p, %d/%d\n", urb,
+				   urb->actual_length,
+				   urb->transfer_buffer_length);
+		}
+	}
+	if (!list_empty(&isp116x->async))
+		seq_printf(s, "\n");
+
+	seq_printf(s, "periodic size= %d\n", PERIODIC_SIZE);
+
+	for (i = 0; i < PERIODIC_SIZE; i++) {
+		ep = isp116x->periodic[i];
+		if (!ep)
+			continue;
+		seq_printf(s, "%2d [%3d]:\n", i, isp116x->load[i]);
+
+		/* DUMB: prints shared entries multiple times */
+		do {
+			seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+				   ep->period, ep,
+				   (ep->udev->speed ==
+				    USB_SPEED_FULL) ? "" : "ls ",
+				   ep->udev->devnum, ep->epnum,
+				   (ep->epnum ==
+				    0) ? "" : ((ep->nextpid ==
+						USB_PID_IN) ? "in" : "out"),
+				   ep->maxpacket);
+			ep = ep->next;
+		} while (ep);
+	}
+	spin_unlock_irq(&isp116x->lock);
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int proc_isp116x_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_isp116x_show, PDE(inode)->data);
+}
+
+static struct file_operations proc_ops = {
+	.open = proc_isp116x_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/* expect just one isp116x per system */
+static const char proc_filename[] = "driver/isp116x";
+
+static void create_debug_file(struct isp116x *isp116x)
+{
+	struct proc_dir_entry *pde;
+
+	pde = create_proc_entry(proc_filename, 0, NULL);
+	if (pde == NULL)
+		return;
+
+	pde->proc_fops = &proc_ops;
+	pde->data = isp116x;
+	isp116x->pde = pde;
+}
+
+static void remove_debug_file(struct isp116x *isp116x)
+{
+	if (isp116x->pde)
+		remove_proc_entry(proc_filename, NULL);
+}
+
+#endif
+
+/*-----------------------------------------------------------------*/
+
+/*
+  Software reset - can be called from any contect.
+*/
+static int isp116x_sw_reset(struct isp116x *isp116x)
+{
+	int retries = 15;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	isp116x_write_reg16(isp116x, HCSWRES, HCSWRES_MAGIC);
+	isp116x_write_reg32(isp116x, HCCMDSTAT, HCCMDSTAT_HCR);
+	while (--retries) {
+		/* It usually resets within 1 ms */
+		mdelay(1);
+		if (!(isp116x_read_reg32(isp116x, HCCMDSTAT) & HCCMDSTAT_HCR))
+			break;
+	}
+	if (!retries) {
+		ERR("Software reset timeout\n");
+		ret = -ETIME;
+	}
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return ret;
+}
+
+/*
+  Reset. Tries to perform platform-specific hardware
+  reset first; falls back to software reset.
+*/
+static int isp116x_reset(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	unsigned long t;
+	u16 clkrdy = 0;
+	int ret = 0, timeout = 15 /* ms */ ;
+
+	if (isp116x->board && isp116x->board->reset) {
+		/* Hardware reset */
+		isp116x->board->reset(hcd->self.controller, 1);
+		msleep(10);
+		if (isp116x->board->clock)
+			isp116x->board->clock(hcd->self.controller, 1);
+		msleep(1);
+		isp116x->board->reset(hcd->self.controller, 0);
+	} else
+		ret = isp116x_sw_reset(isp116x);
+
+	if (ret)
+		return ret;
+
+	t = jiffies + msecs_to_jiffies(timeout);
+	while (time_before_eq(jiffies, t)) {
+		msleep(4);
+		spin_lock_irq(&isp116x->lock);
+		clkrdy = isp116x_read_reg16(isp116x, HCuPINT) & HCuPINT_CLKRDY;
+		spin_unlock_irq(&isp116x->lock);
+		if (clkrdy)
+			break;
+	}
+	if (!clkrdy) {
+		ERR("Clock not ready after 20ms\n");
+		ret = -ENODEV;
+	}
+	return ret;
+}
+
+static void isp116x_stop(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	isp116x_write_reg16(isp116x, HCuPINTENB, 0);
+
+	/* Switch off ports' power, some devices don't come up
+	   after next 'insmod' without this */
+	val = isp116x_read_reg32(isp116x, HCRHDESCA);
+	val &= ~(RH_A_NPS | RH_A_PSM);
+	isp116x_write_reg32(isp116x, HCRHDESCA, val);
+	isp116x_write_reg32(isp116x, HCRHSTATUS, RH_HS_LPS);
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+
+	/* Put the chip into reset state */
+	if (isp116x->board && isp116x->board->reset)
+		isp116x->board->reset(hcd->self.controller, 0);
+	else
+		isp116x_sw_reset(isp116x);
+
+	/* Stop the clock */
+	if (isp116x->board && isp116x->board->clock)
+		isp116x->board->clock(hcd->self.controller, 0);
+}
+
+/*
+  Configure the chip. The chip must be successfully reset by now.
+*/
+static int isp116x_start(struct usb_hcd *hcd)
+{
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	struct isp116x_platform_data *board = isp116x->board;
+	struct usb_device *udev;
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+
+	/* clear interrupt status and disable all interrupt sources */
+	isp116x_write_reg16(isp116x, HCuPINT, 0xff);
+	isp116x_write_reg16(isp116x, HCuPINTENB, 0);
+
+	val = isp116x_read_reg16(isp116x, HCCHIPID);
+	if ((val & HCCHIPID_MASK) != HCCHIPID_MAGIC) {
+		ERR("Invalid chip ID %04x\n", val);
+		spin_unlock_irqrestore(&isp116x->lock, flags);
+		return -ENODEV;
+	}
+
+	isp116x_write_reg16(isp116x, HCITLBUFLEN, ISP116x_ITL_BUFSIZE);
+	isp116x_write_reg16(isp116x, HCATLBUFLEN, ISP116x_ATL_BUFSIZE);
+
+	/* ----- HW conf */
+	val = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1);
+	if (board->sel15Kres)
+		val |= HCHWCFG_15KRSEL;
+	/* Remote wakeup won't work without working clock */
+	if (board->clknotstop || board->remote_wakeup_enable)
+		val |= HCHWCFG_CLKNOTSTOP;
+	if (board->oc_enable)
+		val |= HCHWCFG_ANALOG_OC;
+	if (board->int_act_high)
+		val |= HCHWCFG_INT_POL;
+	if (board->int_edge_triggered)
+		val |= HCHWCFG_INT_TRIGGER;
+	isp116x_write_reg16(isp116x, HCHWCFG, val);
+
+	/* ----- Root hub conf */
+	val = 0;
+	/* AN10003_1.pdf recommends NPS to be always 1 */
+	if (board->no_power_switching)
+		val |= RH_A_NPS;
+	if (board->power_switching_mode)
+		val |= RH_A_PSM;
+	if (board->potpg)
+		val |= (board->potpg << 24) & RH_A_POTPGT;
+	else
+		val |= (25 << 24) & RH_A_POTPGT;
+	isp116x_write_reg32(isp116x, HCRHDESCA, val);
+	isp116x->rhdesca = isp116x_read_reg32(isp116x, HCRHDESCA);
+
+	val = RH_B_PPCM;
+	isp116x_write_reg32(isp116x, HCRHDESCB, val);
+	isp116x->rhdescb = isp116x_read_reg32(isp116x, HCRHDESCB);
+
+	val = 0;
+	if (board->remote_wakeup_enable) {
+		hcd->can_wakeup = 1;
+		val |= RH_HS_DRWE;
+	}
+	isp116x_write_reg32(isp116x, HCRHSTATUS, val);
+	isp116x->rhstatus = isp116x_read_reg32(isp116x, HCRHSTATUS);
+
+	isp116x_write_reg32(isp116x, HCFMINTVL, 0x27782edf);
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+
+	udev = usb_alloc_dev(NULL, &hcd->self, 0);
+	if (!udev) {
+		isp116x_stop(hcd);
+		return -ENOMEM;
+	}
+
+	udev->speed = USB_SPEED_FULL;
+	hcd->state = HC_STATE_RUNNING;
+
+	if (usb_hcd_register_root_hub(udev, hcd) != 0) {
+		isp116x_stop(hcd);
+		usb_put_dev(udev);
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&isp116x->lock, flags);
+	/* Set up interrupts */
+	isp116x->intenb = HCINT_MIE | HCINT_RHSC | HCINT_UE;
+	if (board->remote_wakeup_enable)
+		isp116x->intenb |= HCINT_RD;
+	isp116x->irqenb = HCuPINT_ATL | HCuPINT_OPR;	/* | HCuPINT_SUSP; */
+	isp116x_write_reg32(isp116x, HCINTENB, isp116x->intenb);
+	isp116x_write_reg16(isp116x, HCuPINTENB, isp116x->irqenb);
+
+	/* Go operational */
+	val = HCCONTROL_USB_OPER;
+	/* Remote wakeup connected - NOT SUPPORTED */
+	/*  if (board->remote_wakeup_connected)
+	   val |= HCCONTROL_RWC;  */
+	if (board->remote_wakeup_enable)
+		val |= HCCONTROL_RWE;
+	isp116x_write_reg32(isp116x, HCCONTROL, val);
+
+	/* Disable ports to avoid race in device enumeration */
+	isp116x_write_reg32(isp116x, HCRHPORT1, RH_PS_CCS);
+	isp116x_write_reg32(isp116x, HCRHPORT2, RH_PS_CCS);
+
+	isp116x_show_regs(isp116x);
+	spin_unlock_irqrestore(&isp116x->lock, flags);
+	return 0;
+}
+
+/*-----------------------------------------------------------------*/
+
+static struct hc_driver isp116x_hc_driver = {
+	.description = hcd_name,
+	.product_desc = "ISP116x Host Controller",
+	.hcd_priv_size = sizeof(struct isp116x),
+
+	.irq = isp116x_irq,
+	.flags = HCD_USB11,
+
+	.reset = isp116x_reset,
+	.start = isp116x_start,
+	.stop = isp116x_stop,
+
+	.urb_enqueue = isp116x_urb_enqueue,
+	.urb_dequeue = isp116x_urb_dequeue,
+	.endpoint_disable = isp116x_endpoint_disable,
+
+	.get_frame_number = isp116x_get_frame,
+
+	.hub_status_data = isp116x_hub_status_data,
+	.hub_control = isp116x_hub_control,
+	.hub_suspend = isp116x_hub_suspend,
+	.hub_resume = isp116x_hub_resume,
+};
+
+/*----------------------------------------------------------------*/
+
+static int __init_or_module isp116x_remove(struct device *dev)
+{
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct isp116x *isp116x = hcd_to_isp116x(hcd);
+	struct platform_device *pdev;
+	struct resource *res;
+
+	pdev = container_of(dev, struct platform_device, dev);
+	remove_debug_file(isp116x);
+	usb_remove_hcd(hcd);
+
+	iounmap(isp116x->data_reg);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	release_mem_region(res->start, 2);
+	iounmap(isp116x->addr_reg);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, 2);
+
+	usb_put_hcd(hcd);
+	return 0;
+}
+
+#define resource_len(r) (((r)->end - (r)->start) + 1)
+
+static int __init isp116x_probe(struct device *dev)
+{
+	struct usb_hcd *hcd;
+	struct isp116x *isp116x;
+	struct platform_device *pdev;
+	struct resource *addr, *data;
+	void __iomem *addr_reg;
+	void __iomem *data_reg;
+	int irq;
+	int ret = 0;
+
+	pdev = container_of(dev, struct platform_device, dev);
+	if (pdev->num_resources < 3) {
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	data = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	irq = platform_get_irq(pdev, 0);
+	if (!addr || !data || irq < 0) {
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	if (dev->dma_mask) {
+		DBG("DMA not supported\n");
+		ret = -EINVAL;
+		goto err1;
+	}
+
+	if (!request_mem_region(addr->start, 2, hcd_name)) {
+		ret = -EBUSY;
+		goto err1;
+	}
+	addr_reg = ioremap(addr->start, resource_len(addr));
+	if (addr_reg == NULL) {
+		ret = -ENOMEM;
+		goto err2;
+	}
+	if (!request_mem_region(data->start, 2, hcd_name)) {
+		ret = -EBUSY;
+		goto err3;
+	}
+	data_reg = ioremap(data->start, resource_len(data));
+	if (data_reg == NULL) {
+		ret = -ENOMEM;
+		goto err4;
+	}
+
+	/* allocate and initialize hcd */
+	hcd = usb_create_hcd(&isp116x_hc_driver, dev, dev->bus_id);
+	if (!hcd) {
+		ret = -ENOMEM;
+		goto err5;
+	}
+	/* this rsrc_start is bogus */
+	hcd->rsrc_start = addr->start;
+	isp116x = hcd_to_isp116x(hcd);
+	isp116x->data_reg = data_reg;
+	isp116x->addr_reg = addr_reg;
+	spin_lock_init(&isp116x->lock);
+	INIT_LIST_HEAD(&isp116x->async);
+	INIT_WORK(&isp116x->rh_resume, isp116x_rh_resume, hcd);
+	isp116x->board = dev->platform_data;
+
+	if (!isp116x->board) {
+		ERR("Platform data structure not initialized\n");
+		ret = -ENODEV;
+		goto err6;
+	}
+	if (isp116x_check_platform_delay(isp116x)) {
+		ERR("USE_PLATFORM_DELAY defined, but delay function not "
+		    "implemented.\n");
+		ERR("See comments in drivers/usb/host/isp116x-hcd.c\n");
+		ret = -ENODEV;
+		goto err6;
+	}
+
+	ret = usb_add_hcd(hcd, irq, SA_INTERRUPT);
+	if (ret != 0)
+		goto err6;
+
+	create_debug_file(isp116x);
+	return 0;
+
+      err6:
+	usb_put_hcd(hcd);
+      err5:
+	iounmap(data_reg);
+      err4:
+	release_mem_region(data->start, 2);
+      err3:
+	iounmap(addr_reg);
+      err2:
+	release_mem_region(addr->start, 2);
+      err1:
+	ERR("init error, %d\n", ret);
+	return ret;
+}
+
+#ifdef	CONFIG_PM
+/*
+  Suspend of platform device
+*/
+static int isp116x_suspend(struct device *dev, pm_message_t state, u32 phase)
+{
+	int ret = 0;
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+
+	VDBG("%s: state %x, phase %x\n", __func__, state, phase);
+
+	if (phase != SUSPEND_DISABLE && phase != SUSPEND_POWER_DOWN)
+		return 0;
+
+	ret = usb_suspend_device(hcd->self.root_hub, state);
+	if (!ret) {
+		dev->power.power_state = state;
+		INFO("%s suspended\n", (char *)hcd_name);
+	} else
+		ERR("%s suspend failed\n", (char *)hcd_name);
+
+	return ret;
+}
+
+/*
+  Resume platform device
+*/
+static int isp116x_resume(struct device *dev, u32 phase)
+{
+	int ret = 0;
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+
+	VDBG("%s:  state %x, phase %x\n", __func__, dev->power.power_state,
+	     phase);
+	if (phase != RESUME_POWER_ON)
+		return 0;
+
+	ret = usb_resume_device(hcd->self.root_hub);
+	if (!ret) {
+		dev->power.power_state = PMSG_ON;
+		VDBG("%s resumed\n", (char *)hcd_name);
+	}
+	return ret;
+}
+
+#else
+
+#define	isp116x_suspend    NULL
+#define	isp116x_resume     NULL
+
+#endif
+
+static struct device_driver isp116x_driver = {
+	.name = (char *)hcd_name,
+	.bus = &platform_bus_type,
+	.probe = isp116x_probe,
+	.remove = isp116x_remove,
+	.suspend = isp116x_suspend,
+	.resume = isp116x_resume,
+};
+
+/*-----------------------------------------------------------------*/
+
+static int __init isp116x_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+
+	INFO("driver %s, %s\n", hcd_name, DRIVER_VERSION);
+	return driver_register(&isp116x_driver);
+}
+
+module_init(isp116x_init);
+
+static void __exit isp116x_cleanup(void)
+{
+	driver_unregister(&isp116x_driver);
+}
+
+module_exit(isp116x_cleanup);
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
new file mode 100644
index 00000000000..58873470dcf
--- /dev/null
+++ b/drivers/usb/host/isp116x.h
@@ -0,0 +1,583 @@
+/*
+ * ISP116x register declarations and HCD data structures
+ *
+ * Copyright (C) 2005 Olav Kongas <ok@artecdesign.ee>
+ * Portions:
+ * Copyright (C) 2004 Lothar Wassmann
+ * Copyright (C) 2004 Psion Teklogix
+ * Copyright (C) 2004 David Brownell
+ */
+
+/* us of 1ms frame */
+#define  MAX_LOAD_LIMIT		850
+
+/* Full speed: max # of bytes to transfer for a single urb
+   at a time must be < 1024 && must be multiple of 64.
+   832 allows transfering 4kiB within 5 frames. */
+#define MAX_TRANSFER_SIZE_FULLSPEED	832
+
+/* Low speed: there is no reason to schedule in very big
+   chunks; often the requested long transfers are for
+   string descriptors containing short strings. */
+#define MAX_TRANSFER_SIZE_LOWSPEED	64
+
+/* Bytetime (us), a rough indication of how much time it
+   would take to transfer a byte of useful data over USB */
+#define BYTE_TIME_FULLSPEED	1
+#define BYTE_TIME_LOWSPEED	20
+
+/* Buffer sizes */
+#define ISP116x_BUF_SIZE	4096
+#define ISP116x_ITL_BUFSIZE	0
+#define ISP116x_ATL_BUFSIZE	((ISP116x_BUF_SIZE) - 2*(ISP116x_ITL_BUFSIZE))
+
+#define ISP116x_WRITE_OFFSET	0x80
+
+/*------------ ISP116x registers/bits ------------*/
+#define	HCREVISION	0x00
+#define	HCCONTROL	0x01
+#define		HCCONTROL_HCFS	(3 << 6)	/* host controller
+						   functional state */
+#define		HCCONTROL_USB_RESET	(0 << 6)
+#define		HCCONTROL_USB_RESUME	(1 << 6)
+#define		HCCONTROL_USB_OPER	(2 << 6)
+#define		HCCONTROL_USB_SUSPEND	(3 << 6)
+#define		HCCONTROL_RWC	(1 << 9)	/* remote wakeup connected */
+#define		HCCONTROL_RWE	(1 << 10)	/* remote wakeup enable */
+#define	HCCMDSTAT	0x02
+#define		HCCMDSTAT_HCR	(1 << 0)	/* host controller reset */
+#define		HCCMDSTAT_SOC	(3 << 16)	/* scheduling overrun count */
+#define	HCINTSTAT	0x03
+#define		HCINT_SO	(1 << 0)	/* scheduling overrun */
+#define		HCINT_WDH	(1 << 1)	/* writeback of done_head */
+#define		HCINT_SF	(1 << 2)	/* start frame */
+#define		HCINT_RD	(1 << 3)	/* resume detect */
+#define		HCINT_UE	(1 << 4)	/* unrecoverable error */
+#define		HCINT_FNO	(1 << 5)	/* frame number overflow */
+#define		HCINT_RHSC	(1 << 6)	/* root hub status change */
+#define		HCINT_OC	(1 << 30)	/* ownership change */
+#define		HCINT_MIE	(1 << 31)	/* master interrupt enable */
+#define	HCINTENB	0x04
+#define	HCINTDIS	0x05
+#define	HCFMINTVL	0x0d
+#define	HCFMREM		0x0e
+#define	HCFMNUM		0x0f
+#define	HCLSTHRESH	0x11
+#define	HCRHDESCA	0x12
+#define		RH_A_NDP	(0x3 << 0)	/* # downstream ports */
+#define		RH_A_PSM	(1 << 8)	/* power switching mode */
+#define		RH_A_NPS	(1 << 9)	/* no power switching */
+#define		RH_A_DT		(1 << 10)	/* device type (mbz) */
+#define		RH_A_OCPM	(1 << 11)	/* overcurrent protection
+						   mode */
+#define		RH_A_NOCP	(1 << 12)	/* no overcurrent protection */
+#define		RH_A_POTPGT	(0xff << 24)	/* power on -> power good
+						   time */
+#define	HCRHDESCB	0x13
+#define		RH_B_DR		(0xffff << 0)	/* device removable flags */
+#define		RH_B_PPCM	(0xffff << 16)	/* port power control mask */
+#define	HCRHSTATUS	0x14
+#define		RH_HS_LPS	(1 << 0)	/* local power status */
+#define		RH_HS_OCI	(1 << 1)	/* over current indicator */
+#define		RH_HS_DRWE	(1 << 15)	/* device remote wakeup
+						   enable */
+#define		RH_HS_LPSC	(1 << 16)	/* local power status change */
+#define		RH_HS_OCIC	(1 << 17)	/* over current indicator
+						   change */
+#define		RH_HS_CRWE	(1 << 31)	/* clear remote wakeup
+						   enable */
+#define	HCRHPORT1	0x15
+#define		RH_PS_CCS	(1 << 0)	/* current connect status */
+#define		RH_PS_PES	(1 << 1)	/* port enable status */
+#define		RH_PS_PSS	(1 << 2)	/* port suspend status */
+#define		RH_PS_POCI	(1 << 3)	/* port over current
+						   indicator */
+#define		RH_PS_PRS	(1 << 4)	/* port reset status */
+#define		RH_PS_PPS	(1 << 8)	/* port power status */
+#define		RH_PS_LSDA	(1 << 9)	/* low speed device attached */
+#define		RH_PS_CSC	(1 << 16)	/* connect status change */
+#define		RH_PS_PESC	(1 << 17)	/* port enable status change */
+#define		RH_PS_PSSC	(1 << 18)	/* port suspend status
+						   change */
+#define		RH_PS_OCIC	(1 << 19)	/* over current indicator
+						   change */
+#define		RH_PS_PRSC	(1 << 20)	/* port reset status change */
+#define		HCRHPORT_CLRMASK	(0x1f << 16)
+#define	HCRHPORT2	0x16
+#define	HCHWCFG		0x20
+#define		HCHWCFG_15KRSEL		(1 << 12)
+#define		HCHWCFG_CLKNOTSTOP	(1 << 11)
+#define		HCHWCFG_ANALOG_OC	(1 << 10)
+#define		HCHWCFG_DACK_MODE	(1 << 8)
+#define		HCHWCFG_EOT_POL		(1 << 7)
+#define		HCHWCFG_DACK_POL	(1 << 6)
+#define		HCHWCFG_DREQ_POL	(1 << 5)
+#define		HCHWCFG_DBWIDTH_MASK	(0x03 << 3)
+#define		HCHWCFG_DBWIDTH(n)	(((n) << 3) & HCHWCFG_DBWIDTH_MASK)
+#define		HCHWCFG_INT_POL		(1 << 2)
+#define		HCHWCFG_INT_TRIGGER	(1 << 1)
+#define		HCHWCFG_INT_ENABLE	(1 << 0)
+#define	HCDMACFG	0x21
+#define		HCDMACFG_BURST_LEN_MASK	(0x03 << 5)
+#define		HCDMACFG_BURST_LEN(n)	(((n) << 5) & HCDMACFG_BURST_LEN_MASK)
+#define		HCDMACFG_BURST_LEN_1	HCDMACFG_BURST_LEN(0)
+#define		HCDMACFG_BURST_LEN_4	HCDMACFG_BURST_LEN(1)
+#define		HCDMACFG_BURST_LEN_8	HCDMACFG_BURST_LEN(2)
+#define		HCDMACFG_DMA_ENABLE	(1 << 4)
+#define		HCDMACFG_BUF_TYPE_MASK	(0x07 << 1)
+#define		HCDMACFG_CTR_SEL	(1 << 2)
+#define		HCDMACFG_ITLATL_SEL	(1 << 1)
+#define		HCDMACFG_DMA_RW_SELECT	(1 << 0)
+#define	HCXFERCTR	0x22
+#define	HCuPINT		0x24
+#define		HCuPINT_SOF		(1 << 0)
+#define		HCuPINT_ATL		(1 << 1)
+#define		HCuPINT_AIIEOT		(1 << 2)
+#define		HCuPINT_OPR		(1 << 4)
+#define		HCuPINT_SUSP		(1 << 5)
+#define		HCuPINT_CLKRDY		(1 << 6)
+#define	HCuPINTENB	0x25
+#define	HCCHIPID	0x27
+#define		HCCHIPID_MASK		0xff00
+#define		HCCHIPID_MAGIC		0x6100
+#define	HCSCRATCH	0x28
+#define	HCSWRES		0x29
+#define		HCSWRES_MAGIC		0x00f6
+#define	HCITLBUFLEN	0x2a
+#define	HCATLBUFLEN	0x2b
+#define	HCBUFSTAT	0x2c
+#define		HCBUFSTAT_ITL0_FULL	(1 << 0)
+#define		HCBUFSTAT_ITL1_FULL	(1 << 1)
+#define		HCBUFSTAT_ATL_FULL	(1 << 2)
+#define		HCBUFSTAT_ITL0_DONE	(1 << 3)
+#define		HCBUFSTAT_ITL1_DONE	(1 << 4)
+#define		HCBUFSTAT_ATL_DONE	(1 << 5)
+#define	HCRDITL0LEN	0x2d
+#define	HCRDITL1LEN	0x2e
+#define	HCITLPORT	0x40
+#define	HCATLPORT	0x41
+
+/* Philips transfer descriptor */
+struct ptd {
+	u16 count;
+#define	PTD_COUNT_MSK	(0x3ff << 0)
+#define	PTD_TOGGLE_MSK	(1 << 10)
+#define	PTD_ACTIVE_MSK	(1 << 11)
+#define	PTD_CC_MSK	(0xf << 12)
+	u16 mps;
+#define	PTD_MPS_MSK	(0x3ff << 0)
+#define	PTD_SPD_MSK	(1 << 10)
+#define	PTD_LAST_MSK	(1 << 11)
+#define	PTD_EP_MSK	(0xf << 12)
+	u16 len;
+#define	PTD_LEN_MSK	(0x3ff << 0)
+#define	PTD_DIR_MSK	(3 << 10)
+#define	PTD_DIR_SETUP	(0)
+#define	PTD_DIR_OUT	(1)
+#define	PTD_DIR_IN	(2)
+#define	PTD_B5_5_MSK	(1 << 13)
+	u16 faddr;
+#define	PTD_FA_MSK	(0x7f << 0)
+#define	PTD_FMT_MSK	(1 << 7)
+} __attribute__ ((packed, aligned(2)));
+
+/* PTD accessor macros. */
+#define PTD_GET_COUNT(p)	(((p)->count & PTD_COUNT_MSK) >> 0)
+#define PTD_COUNT(v)		(((v) << 0) & PTD_COUNT_MSK)
+#define PTD_GET_TOGGLE(p)	(((p)->count & PTD_TOGGLE_MSK) >> 10)
+#define PTD_TOGGLE(v)		(((v) << 10) & PTD_TOGGLE_MSK)
+#define PTD_GET_ACTIVE(p)	(((p)->count & PTD_ACTIVE_MSK) >> 11)
+#define PTD_ACTIVE(v)		(((v) << 11) & PTD_ACTIVE_MSK)
+#define PTD_GET_CC(p)		(((p)->count & PTD_CC_MSK) >> 12)
+#define PTD_CC(v)		(((v) << 12) & PTD_CC_MSK)
+#define PTD_GET_MPS(p)		(((p)->mps & PTD_MPS_MSK) >> 0)
+#define PTD_MPS(v)		(((v) << 0) & PTD_MPS_MSK)
+#define PTD_GET_SPD(p)		(((p)->mps & PTD_SPD_MSK) >> 10)
+#define PTD_SPD(v)		(((v) << 10) & PTD_SPD_MSK)
+#define PTD_GET_LAST(p)		(((p)->mps & PTD_LAST_MSK) >> 11)
+#define PTD_LAST(v)		(((v) << 11) & PTD_LAST_MSK)
+#define PTD_GET_EP(p)		(((p)->mps & PTD_EP_MSK) >> 12)
+#define PTD_EP(v)		(((v) << 12) & PTD_EP_MSK)
+#define PTD_GET_LEN(p)		(((p)->len & PTD_LEN_MSK) >> 0)
+#define PTD_LEN(v)		(((v) << 0) & PTD_LEN_MSK)
+#define PTD_GET_DIR(p)		(((p)->len & PTD_DIR_MSK) >> 10)
+#define PTD_DIR(v)		(((v) << 10) & PTD_DIR_MSK)
+#define PTD_GET_B5_5(p)		(((p)->len & PTD_B5_5_MSK) >> 13)
+#define PTD_B5_5(v)		(((v) << 13) & PTD_B5_5_MSK)
+#define PTD_GET_FA(p)		(((p)->faddr & PTD_FA_MSK) >> 0)
+#define PTD_FA(v)		(((v) << 0) & PTD_FA_MSK)
+#define PTD_GET_FMT(p)		(((p)->faddr & PTD_FMT_MSK) >> 7)
+#define PTD_FMT(v)		(((v) << 7) & PTD_FMT_MSK)
+
+/*  Hardware transfer status codes -- CC from ptd->count */
+#define TD_CC_NOERROR      0x00
+#define TD_CC_CRC          0x01
+#define TD_CC_BITSTUFFING  0x02
+#define TD_CC_DATATOGGLEM  0x03
+#define TD_CC_STALL        0x04
+#define TD_DEVNOTRESP      0x05
+#define TD_PIDCHECKFAIL    0x06
+#define TD_UNEXPECTEDPID   0x07
+#define TD_DATAOVERRUN     0x08
+#define TD_DATAUNDERRUN    0x09
+    /* 0x0A, 0x0B reserved for hardware */
+#define TD_BUFFEROVERRUN   0x0C
+#define TD_BUFFERUNDERRUN  0x0D
+    /* 0x0E, 0x0F reserved for HCD */
+#define TD_NOTACCESSED     0x0F
+
+/* map PTD status codes (CC) to errno values */
+static const int cc_to_error[16] = {
+	/* No  Error  */ 0,
+	/* CRC Error  */ -EILSEQ,
+	/* Bit Stuff  */ -EPROTO,
+	/* Data Togg  */ -EILSEQ,
+	/* Stall      */ -EPIPE,
+	/* DevNotResp */ -ETIMEDOUT,
+	/* PIDCheck   */ -EPROTO,
+	/* UnExpPID   */ -EPROTO,
+	/* DataOver   */ -EOVERFLOW,
+	/* DataUnder  */ -EREMOTEIO,
+	/* (for hw)   */ -EIO,
+	/* (for hw)   */ -EIO,
+	/* BufferOver */ -ECOMM,
+	/* BuffUnder  */ -ENOSR,
+	/* (for HCD)  */ -EALREADY,
+	/* (for HCD)  */ -EALREADY
+};
+
+/*--------------------------------------------------------------*/
+
+#define	LOG2_PERIODIC_SIZE	5	/* arbitrary; this matches OHCI */
+#define	PERIODIC_SIZE		(1 << LOG2_PERIODIC_SIZE)
+
+struct isp116x {
+	spinlock_t lock;
+	struct work_struct rh_resume;
+
+	void __iomem *addr_reg;
+	void __iomem *data_reg;
+
+	struct isp116x_platform_data *board;
+
+	struct proc_dir_entry *pde;
+	unsigned long stat1, stat2, stat4, stat8, stat16;
+
+	/* HC registers */
+	u32 intenb;		/* "OHCI" interrupts */
+	u16 irqenb;		/* uP interrupts */
+
+	/* Root hub registers */
+	u32 rhdesca;
+	u32 rhdescb;
+	u32 rhstatus;
+	u32 rhport[2];
+
+	/* async schedule: control, bulk */
+	struct list_head async;
+
+	/* periodic schedule: int */
+	u16 load[PERIODIC_SIZE];
+	struct isp116x_ep *periodic[PERIODIC_SIZE];
+	unsigned periodic_count;
+	u16 fmindex;
+
+	/* Schedule for the current frame */
+	struct isp116x_ep *atl_active;
+	int atl_buflen;
+	int atl_bufshrt;
+	int atl_last_dir;
+	atomic_t atl_finishing;
+};
+
+static inline struct isp116x *hcd_to_isp116x(struct usb_hcd *hcd)
+{
+	return (struct isp116x *)(hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *isp116x_to_hcd(struct isp116x *isp116x)
+{
+	return container_of((void *)isp116x, struct usb_hcd, hcd_priv);
+}
+
+struct isp116x_ep {
+	struct usb_host_endpoint *hep;
+	struct usb_device *udev;
+	struct ptd ptd;
+
+	u8 maxpacket;
+	u8 epnum;
+	u8 nextpid;
+	u16 error_count;
+	u16 length;		/* of current packet */
+	unsigned char *data;	/* to databuf */
+	/* queue of active EP's (the ones scheduled for the
+	   current frame) */
+	struct isp116x_ep *active;
+
+	/* periodic schedule */
+	u16 period;
+	u16 branch;
+	u16 load;
+	struct isp116x_ep *next;
+
+	/* async schedule */
+	struct list_head schedule;
+};
+
+/*-------------------------------------------------------------------------*/
+
+#ifdef DEBUG
+#define DBG(stuff...)		printk(KERN_DEBUG "116x: " stuff)
+#else
+#define DBG(stuff...)		do{}while(0)
+#endif
+
+#ifdef VERBOSE
+#    define VDBG		DBG
+#else
+#    define VDBG(stuff...)	do{}while(0)
+#endif
+
+#define ERR(stuff...)		printk(KERN_ERR "116x: " stuff)
+#define WARN(stuff...)		printk(KERN_WARNING "116x: " stuff)
+#define INFO(stuff...)		printk(KERN_INFO "116x: " stuff)
+
+/* ------------------------------------------------- */
+
+#if defined(USE_PLATFORM_DELAY)
+#if defined(USE_NDELAY)
+#error USE_PLATFORM_DELAY and USE_NDELAY simultaneously defined.
+#endif
+#define	isp116x_delay(h,d)	(h)->board->delay(	\
+				isp116x_to_hcd(h)->self.controller,d)
+#define isp116x_check_platform_delay(h)	((h)->board->delay == NULL)
+#elif defined(USE_NDELAY)
+#define	isp116x_delay(h,d)	ndelay(d)
+#define isp116x_check_platform_delay(h)	0
+#else
+#define	isp116x_delay(h,d)	do{}while(0)
+#define isp116x_check_platform_delay(h)	0
+#endif
+
+#if defined(DEBUG)
+#define	IRQ_TEST()	BUG_ON(!irqs_disabled())
+#else
+#define	IRQ_TEST()	do{}while(0)
+#endif
+
+static inline void isp116x_write_addr(struct isp116x *isp116x, unsigned reg)
+{
+	IRQ_TEST();
+	writew(reg & 0xff, isp116x->addr_reg);
+	isp116x_delay(isp116x, 300);
+}
+
+static inline void isp116x_write_data16(struct isp116x *isp116x, u16 val)
+{
+	writew(val, isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+}
+
+static inline void isp116x_raw_write_data16(struct isp116x *isp116x, u16 val)
+{
+	__raw_writew(val, isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+}
+
+static inline u16 isp116x_read_data16(struct isp116x *isp116x)
+{
+	u16 val;
+
+	val = readw(isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+	return val;
+}
+
+static inline u16 isp116x_raw_read_data16(struct isp116x *isp116x)
+{
+	u16 val;
+
+	val = __raw_readw(isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+	return val;
+}
+
+static inline void isp116x_write_data32(struct isp116x *isp116x, u32 val)
+{
+	writew(val & 0xffff, isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+	writew(val >> 16, isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+}
+
+static inline u32 isp116x_read_data32(struct isp116x *isp116x)
+{
+	u32 val;
+
+	val = (u32) readw(isp116x->data_reg);
+	isp116x_delay(isp116x, 150);
+	val |= ((u32) readw(isp116x->data_reg)) << 16;
+	isp116x_delay(isp116x, 150);
+	return val;
+}
+
+/* Let's keep register access functions out of line. Hint:
+   we wait at least 150 ns at every access.
+*/
+static u16 isp116x_read_reg16(struct isp116x *isp116x, unsigned reg)
+{
+	isp116x_write_addr(isp116x, reg);
+	return isp116x_read_data16(isp116x);
+}
+
+static u32 isp116x_read_reg32(struct isp116x *isp116x, unsigned reg)
+{
+	isp116x_write_addr(isp116x, reg);
+	return isp116x_read_data32(isp116x);
+}
+
+static void isp116x_write_reg16(struct isp116x *isp116x, unsigned reg,
+				unsigned val)
+{
+	isp116x_write_addr(isp116x, reg | ISP116x_WRITE_OFFSET);
+	isp116x_write_data16(isp116x, (u16) (val & 0xffff));
+}
+
+static void isp116x_write_reg32(struct isp116x *isp116x, unsigned reg,
+				unsigned val)
+{
+	isp116x_write_addr(isp116x, reg | ISP116x_WRITE_OFFSET);
+	isp116x_write_data32(isp116x, (u32) val);
+}
+
+#define isp116x_show_reg(d,r) {					\
+	if ((r) < 0x20) {			                \
+		DBG("%-12s[%02x]: %08x\n", #r,			\
+			r, isp116x_read_reg32(d, r));		\
+	} else {						\
+		DBG("%-12s[%02x]:     %04x\n", #r,		\
+			r, isp116x_read_reg16(d, r));	    	\
+	}							\
+}
+
+static inline void isp116x_show_regs(struct isp116x *isp116x)
+{
+	isp116x_show_reg(isp116x, HCREVISION);
+	isp116x_show_reg(isp116x, HCCONTROL);
+	isp116x_show_reg(isp116x, HCCMDSTAT);
+	isp116x_show_reg(isp116x, HCINTSTAT);
+	isp116x_show_reg(isp116x, HCINTENB);
+	isp116x_show_reg(isp116x, HCFMINTVL);
+	isp116x_show_reg(isp116x, HCFMREM);
+	isp116x_show_reg(isp116x, HCFMNUM);
+	isp116x_show_reg(isp116x, HCLSTHRESH);
+	isp116x_show_reg(isp116x, HCRHDESCA);
+	isp116x_show_reg(isp116x, HCRHDESCB);
+	isp116x_show_reg(isp116x, HCRHSTATUS);
+	isp116x_show_reg(isp116x, HCRHPORT1);
+	isp116x_show_reg(isp116x, HCRHPORT2);
+	isp116x_show_reg(isp116x, HCHWCFG);
+	isp116x_show_reg(isp116x, HCDMACFG);
+	isp116x_show_reg(isp116x, HCXFERCTR);
+	isp116x_show_reg(isp116x, HCuPINT);
+	isp116x_show_reg(isp116x, HCuPINTENB);
+	isp116x_show_reg(isp116x, HCCHIPID);
+	isp116x_show_reg(isp116x, HCSCRATCH);
+	isp116x_show_reg(isp116x, HCITLBUFLEN);
+	isp116x_show_reg(isp116x, HCATLBUFLEN);
+	isp116x_show_reg(isp116x, HCBUFSTAT);
+	isp116x_show_reg(isp116x, HCRDITL0LEN);
+	isp116x_show_reg(isp116x, HCRDITL1LEN);
+}
+
+#if defined(URB_TRACE)
+
+#define PIPETYPE(pipe)  ({ char *__s;			\
+	if (usb_pipecontrol(pipe))	__s = "ctrl";	\
+	else if (usb_pipeint(pipe))	__s = "int";	\
+	else if (usb_pipebulk(pipe))	__s = "bulk";	\
+	else				__s = "iso";	\
+	__s;})
+#define PIPEDIR(pipe)   ({ usb_pipein(pipe) ? "in" : "out"; })
+#define URB_NOTSHORT(urb) ({ (urb)->transfer_flags & URB_SHORT_NOT_OK ? \
+	"short_not_ok" : ""; })
+
+/* print debug info about the URB */
+static void urb_dbg(struct urb *urb, char *msg)
+{
+	unsigned int pipe;
+
+	if (!urb) {
+		DBG("%s: zero urb\n", msg);
+		return;
+	}
+	pipe = urb->pipe;
+	DBG("%s: FA %d ep%d%s %s: len %d/%d %s\n", msg,
+	    usb_pipedevice(pipe), usb_pipeendpoint(pipe),
+	    PIPEDIR(pipe), PIPETYPE(pipe),
+	    urb->transfer_buffer_length, urb->actual_length, URB_NOTSHORT(urb));
+}
+
+#else
+
+#define  urb_dbg(urb,msg)   do{}while(0)
+
+#endif				/* ! defined(URB_TRACE) */
+
+#if defined(PTD_TRACE)
+
+#define PTD_DIR_STR(ptd)  ({char __c;		\
+	switch(PTD_GET_DIR(ptd)){		\
+	case 0:  __c = 's'; break;		\
+	case 1:  __c = 'o'; break;		\
+	default: __c = 'i'; break;		\
+	}; __c;})
+
+/*
+  Dump PTD info. The code documents the format
+  perfectly, right :)
+*/
+static inline void dump_ptd(struct ptd *ptd)
+{
+	printk("td: %x %d%c%d %d,%d,%d  %x %x%x%x\n",
+	       PTD_GET_CC(ptd), PTD_GET_FA(ptd),
+	       PTD_DIR_STR(ptd), PTD_GET_EP(ptd),
+	       PTD_GET_COUNT(ptd), PTD_GET_LEN(ptd), PTD_GET_MPS(ptd),
+	       PTD_GET_TOGGLE(ptd), PTD_GET_ACTIVE(ptd),
+	       PTD_GET_SPD(ptd), PTD_GET_LAST(ptd));
+}
+
+static inline void dump_ptd_out_data(struct ptd *ptd, u8 * buf)
+{
+	int k;
+
+	if (PTD_GET_DIR(ptd) != PTD_DIR_IN && PTD_GET_LEN(ptd)) {
+		printk("-> ");
+		for (k = 0; k < PTD_GET_LEN(ptd); ++k)
+			printk("%02x ", ((u8 *) buf)[k]);
+		printk("\n");
+	}
+}
+
+static inline void dump_ptd_in_data(struct ptd *ptd, u8 * buf)
+{
+	int k;
+
+	if (PTD_GET_DIR(ptd) == PTD_DIR_IN && PTD_GET_COUNT(ptd)) {
+		printk("<- ");
+		for (k = 0; k < PTD_GET_COUNT(ptd); ++k)
+			printk("%02x ", ((u8 *) buf)[k]);
+		printk("\n");
+	}
+	if (PTD_GET_LAST(ptd))
+		printk("-\n");
+}
+
+#else
+
+#define dump_ptd(ptd)               do{}while(0)
+#define dump_ptd_in_data(ptd,buf)   do{}while(0)
+#define dump_ptd_out_data(ptd,buf)  do{}while(0)
+
+#endif				/* ! defined(PTD_TRACE) */
diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h
new file mode 100644
index 00000000000..5f5a9d9bd6c
--- /dev/null
+++ b/include/linux/usb_isp116x.h
@@ -0,0 +1,47 @@
+
+/*
+ * Board initialization code should put one of these into dev->platform_data
+ * and place the isp116x onto platform_bus.
+ */
+
+struct isp116x_platform_data {
+	/* Enable internal resistors on downstream ports */
+	unsigned sel15Kres:1;
+	/* Chip's internal clock won't be stopped in suspended state.
+	   Setting/unsetting this bit takes effect only if
+	   'remote_wakeup_enable' below is not set. */
+	unsigned clknotstop:1;
+	/* On-chip overcurrent protection */
+	unsigned oc_enable:1;
+	/* INT output polarity */
+	unsigned int_act_high:1;
+	/* INT edge or level triggered */
+	unsigned int_edge_triggered:1;
+	/* WAKEUP pin connected - NOT SUPPORTED  */
+	/* unsigned remote_wakeup_connected:1; */
+	/* Wakeup by devices on usb bus enabled */
+	unsigned remote_wakeup_enable:1;
+	/* Switch or not to switch (keep always powered) */
+	unsigned no_power_switching:1;
+	/* Ganged port power switching (0) or individual port
+	   power switching (1) */
+	unsigned power_switching_mode:1;
+	/* Given port_power, msec/2 after power on till power good */
+	u8 potpg;
+	/* Hardware reset set/clear. If implemented, this function must:
+	   if set == 0,   deassert chip's HW reset pin
+	   otherwise,     assert chip's HW reset pin       */
+	void (*reset) (struct device * dev, int set);
+	/* Hardware clock start/stop. If implemented, this function must:
+	   if start == 0,    stop the external clock
+	   otherwise,        start the external clock
+	 */
+	void (*clock) (struct device * dev, int start);
+	/* Inter-io delay (ns). The chip is picky about access timings; it
+	   expects at least:
+	   150ns delay between consecutive accesses to DATA_REG,
+	   300ns delay between access to ADDR_REG and DATA_REG
+	   OE, WE MUST NOT be changed during these intervals
+	 */
+	void (*delay) (struct device * dev, int delay);
+};
-- 
cgit v1.2.3-70-g09d2


From 1bbc169621cbe502b9143a27eb12802a0f1d43a0 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sat, 7 May 2005 13:05:13 -0700
Subject: [PATCH] USB: gadget driver updates (SETUP api change)

This updates most of the gadget framework to expect SETUP packets use
USB byteorder (matching the annotation in <linux/usb_ch9.h> and usage
in the host side stack):

  - definition in <linux/usb_gadget.h>
  - gadget drivers:  Ethernet/RNDIS, serial/ACM, file_storage, gadgetfs.
  - dummy_hcd

It also includes some other similar changes as suggested by "sparse",
which was used to detect byteorder bugs.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/dummy_hcd.c    |  3 ---
 drivers/usb/gadget/ether.c        |  6 +++---
 drivers/usb/gadget/file_storage.c | 19 +++++++++----------
 drivers/usb/gadget/inode.c        | 12 ++++++------
 drivers/usb/gadget/serial.c       | 36 +++++++++++++++++++-----------------
 drivers/usb/gadget/zero.c         |  6 +++---
 include/linux/usb_gadget.h        |  2 +-
 7 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index 73d2f24050a..f9540adf2a4 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -1267,9 +1267,6 @@ restart:
 			struct dummy_ep			*ep2;
 
 			setup = *(struct usb_ctrlrequest*) urb->setup_packet;
-			le16_to_cpus (&setup.wIndex);
-			le16_to_cpus (&setup.wValue);
-			le16_to_cpus (&setup.wLength);
 			if (setup.wLength != urb->transfer_buffer_length) {
 				maybe_set_status (urb, -EOVERFLOW);
 				goto return_urb;
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 3830a0a0fd5..9f8413e3c10 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1277,9 +1277,9 @@ eth_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 	struct eth_dev		*dev = get_gadget_data (gadget);
 	struct usb_request	*req = dev->req;
 	int			value = -EOPNOTSUPP;
-	u16			wIndex = (__force u16) ctrl->wIndex;
-	u16			wValue = (__force u16) ctrl->wValue;
-	u16			wLength = (__force u16) ctrl->wLength;
+	u16			wIndex = le16_to_cpu(ctrl->wIndex);
+	u16			wValue = le16_to_cpu(ctrl->wValue);
+	u16			wLength = le16_to_cpu(ctrl->wLength);
 
 	/* descriptors just go into the pre-allocated ep0 buffer,
 	 * while config change events may enable network traffic.
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index f5ce45c4b2a..4f57085619b 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -819,7 +819,7 @@ static void inline put_be32(u8 *buf, u32 val)
 	buf[0] = val >> 24;
 	buf[1] = val >> 16;
 	buf[2] = val >> 8;
-	buf[3] = val;
+	buf[3] = val & 0xff;
 }
 
 
@@ -1277,8 +1277,8 @@ static int class_setup_req(struct fsg_dev *fsg,
 {
 	struct usb_request	*req = fsg->ep0req;
 	int			value = -EOPNOTSUPP;
-	u16			w_index = ctrl->wIndex;
-	u16			w_length = ctrl->wLength;
+	u16			w_index = le16_to_cpu(ctrl->wIndex);
+	u16			w_length = le16_to_cpu(ctrl->wLength);
 
 	if (!fsg->config)
 		return value;
@@ -1345,7 +1345,7 @@ static int class_setup_req(struct fsg_dev *fsg,
 			"unknown class-specific control req "
 			"%02x.%02x v%04x i%04x l%u\n",
 			ctrl->bRequestType, ctrl->bRequest,
-			ctrl->wValue, w_index, w_length);
+			le16_to_cpu(ctrl->wValue), w_index, w_length);
 	return value;
 }
 
@@ -1359,8 +1359,8 @@ static int standard_setup_req(struct fsg_dev *fsg,
 {
 	struct usb_request	*req = fsg->ep0req;
 	int			value = -EOPNOTSUPP;
-	u16			w_index = ctrl->wIndex;
-	u16			w_value = ctrl->wValue;
+	u16			w_index = le16_to_cpu(ctrl->wIndex);
+	u16			w_value = le16_to_cpu(ctrl->wValue);
 
 	/* Usually this just stores reply data in the pre-allocated ep0 buffer,
 	 * but config change events will also reconfigure hardware. */
@@ -1469,7 +1469,7 @@ static int standard_setup_req(struct fsg_dev *fsg,
 		VDBG(fsg,
 			"unknown control req %02x.%02x v%04x i%04x l%u\n",
 			ctrl->bRequestType, ctrl->bRequest,
-			w_value, w_index, ctrl->wLength);
+			w_value, w_index, le16_to_cpu(ctrl->wLength));
 	}
 
 	return value;
@@ -1481,7 +1481,7 @@ static int fsg_setup(struct usb_gadget *gadget,
 {
 	struct fsg_dev		*fsg = get_gadget_data(gadget);
 	int			rc;
-	int			w_length = ctrl->wLength;
+	int			w_length = le16_to_cpu(ctrl->wLength);
 
 	++fsg->ep0_req_tag;		// Record arrival of a new request
 	fsg->ep0req->context = NULL;
@@ -1497,8 +1497,7 @@ static int fsg_setup(struct usb_gadget *gadget,
 	if (rc >= 0 && rc != DELAYED_STATUS) {
 		rc = min(rc, w_length);
 		fsg->ep0req->length = rc;
-		fsg->ep0req->zero = (rc < w_length &&
-				(rc % gadget->ep0->maxpacket) == 0);
+		fsg->ep0req->zero = rc < w_length;
 		fsg->ep0req_name = (ctrl->bRequestType & USB_DIR_IN ?
 				"ep0-in" : "ep0-out");
 		rc = ep0_queue(fsg);
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 1e5e6ddef78..020815397a4 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -417,8 +417,8 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 		goto free1;
 
 	value = ep_io (data, kbuf, len);
-	VDEBUG (data->dev, "%s read %d OUT, status %d\n",
-		data->name, len, value);
+	VDEBUG (data->dev, "%s read %zu OUT, status %d\n",
+		data->name, len, (int) value);
 	if (value >= 0 && copy_to_user (buf, kbuf, value))
 		value = -EFAULT;
 
@@ -465,8 +465,8 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 	}
 
 	value = ep_io (data, kbuf, len);
-	VDEBUG (data->dev, "%s write %d IN, status %d\n",
-		data->name, len, value);
+	VDEBUG (data->dev, "%s write %zu IN, status %d\n",
+		data->name, len, (int) value);
 free1:
 	up (&data->lock);
 	kfree (kbuf);
@@ -1318,8 +1318,8 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 	struct usb_request		*req = dev->req;
 	int				value = -EOPNOTSUPP;
 	struct usb_gadgetfs_event	*event;
-	u16				w_value = ctrl->wValue;
-	u16				w_length = ctrl->wLength;
+	u16				w_value = le16_to_cpu(ctrl->wValue);
+	u16				w_length = le16_to_cpu(ctrl->wLength);
 
 	spin_lock (&dev->lock);
 	dev->setup_abort = 0;
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index 4d591c764e3..9e4f1c6935a 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -300,18 +300,18 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed,
 		u8 type, unsigned int index, int is_otg);
 
 static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len,
-	int kmalloc_flags);
+	unsigned kmalloc_flags);
 static void gs_free_req(struct usb_ep *ep, struct usb_request *req);
 
 static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len,
-	int kmalloc_flags);
+	unsigned kmalloc_flags);
 static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req);
 
-static int gs_alloc_ports(struct gs_dev *dev, int kmalloc_flags);
+static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags);
 static void gs_free_ports(struct gs_dev *dev);
 
 /* circular buffer */
-static struct gs_buf *gs_buf_alloc(unsigned int size, int kmalloc_flags);
+static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags);
 static void gs_buf_free(struct gs_buf *gb);
 static void gs_buf_clear(struct gs_buf *gb);
 static unsigned int gs_buf_data_avail(struct gs_buf *gb);
@@ -1607,9 +1607,9 @@ static int gs_setup(struct usb_gadget *gadget,
 	int ret = -EOPNOTSUPP;
 	struct gs_dev *dev = get_gadget_data(gadget);
 	struct usb_request *req = dev->dev_ctrl_req;
-	u16 wIndex = ctrl->wIndex;
-	u16 wValue = ctrl->wValue;
-	u16 wLength = ctrl->wLength;
+	u16 wIndex = le16_to_cpu(ctrl->wIndex);
+	u16 wValue = le16_to_cpu(ctrl->wValue);
+	u16 wLength = le16_to_cpu(ctrl->wLength);
 
 	switch (ctrl->bRequestType & USB_TYPE_MASK) {
 	case USB_TYPE_STANDARD:
@@ -1651,9 +1651,9 @@ static int gs_setup_standard(struct usb_gadget *gadget,
 	int ret = -EOPNOTSUPP;
 	struct gs_dev *dev = get_gadget_data(gadget);
 	struct usb_request *req = dev->dev_ctrl_req;
-	u16 wIndex = ctrl->wIndex;
-	u16 wValue = ctrl->wValue;
-	u16 wLength = ctrl->wLength;
+	u16 wIndex = le16_to_cpu(ctrl->wIndex);
+	u16 wValue = le16_to_cpu(ctrl->wValue);
+	u16 wLength = le16_to_cpu(ctrl->wLength);
 
 	switch (ctrl->bRequest) {
 	case USB_REQ_GET_DESCRIPTOR:
@@ -1782,9 +1782,9 @@ static int gs_setup_class(struct usb_gadget *gadget,
 	struct gs_dev *dev = get_gadget_data(gadget);
 	struct gs_port *port = dev->dev_port[0];	/* ACM only has one port */
 	struct usb_request *req = dev->dev_ctrl_req;
-	u16 wIndex = ctrl->wIndex;
-	u16 wValue = ctrl->wValue;
-	u16 wLength = ctrl->wLength;
+	u16 wIndex = le16_to_cpu(ctrl->wIndex);
+	u16 wValue = le16_to_cpu(ctrl->wValue);
+	u16 wLength = le16_to_cpu(ctrl->wLength);
 
 	switch (ctrl->bRequest) {
 	case USB_CDC_REQ_SET_LINE_CODING:
@@ -2119,7 +2119,8 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed,
  * Allocate a usb_request and its buffer.  Returns a pointer to the
  * usb_request or NULL if there is an error.
  */
-static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len, int kmalloc_flags)
+static struct usb_request *
+gs_alloc_req(struct usb_ep *ep, unsigned int len, unsigned kmalloc_flags)
 {
 	struct usb_request *req;
 
@@ -2159,7 +2160,8 @@ static void gs_free_req(struct usb_ep *ep, struct usb_request *req)
  * Allocates a request and its buffer, using the given
  * endpoint, buffer len, and kmalloc flags.
  */
-static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len, int kmalloc_flags)
+static struct gs_req_entry *
+gs_alloc_req_entry(struct usb_ep *ep, unsigned len, unsigned kmalloc_flags)
 {
 	struct gs_req_entry	*req;
 
@@ -2200,7 +2202,7 @@ static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req)
  *
  * The device lock is normally held when calling this function.
  */
-static int gs_alloc_ports(struct gs_dev *dev, int kmalloc_flags)
+static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags)
 {
 	int i;
 	struct gs_port *port;
@@ -2282,7 +2284,7 @@ static void gs_free_ports(struct gs_dev *dev)
  *
  * Allocate a circular buffer and all associated memory.
  */
-static struct gs_buf *gs_buf_alloc(unsigned int size, int kmalloc_flags)
+static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags)
 {
 	struct gs_buf *gb;
 
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 6e49432071a..a6e035e2447 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -919,9 +919,9 @@ zero_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 	struct zero_dev		*dev = get_gadget_data (gadget);
 	struct usb_request	*req = dev->req;
 	int			value = -EOPNOTSUPP;
-	u16			w_index = ctrl->wIndex;
-	u16			w_value = ctrl->wValue;
-	u16			w_length = ctrl->wLength;
+	u16			w_index = le16_to_cpu(ctrl->wIndex);
+	u16			w_value = le16_to_cpu(ctrl->wValue);
+	u16			w_length = le16_to_cpu(ctrl->wLength);
 
 	/* usually this stores reply data in the pre-allocated ep0 buffer,
 	 * but config change events will reconfigure hardware.
diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h
index 9bba9997947..b00f127cb44 100644
--- a/include/linux/usb_gadget.h
+++ b/include/linux/usb_gadget.h
@@ -711,7 +711,7 @@ usb_gadget_disconnect (struct usb_gadget *gadget)
  * 	the hardware level driver. Most calls must be handled by
  * 	the gadget driver, including descriptor and configuration
  * 	management.  The 16 bit members of the setup data are in
- * 	cpu order. Called in_interrupt; this may not sleep.  Driver
+ * 	USB byte order. Called in_interrupt; this may not sleep.  Driver
  *	queues a response to ep0, or returns negative to stall.
  * @disconnect: Invoked after all transfers have been stopped,
  * 	when the host is disconnected.  May be called in_interrupt; this
-- 
cgit v1.2.3-70-g09d2


From 5da0106f0b9b13afa4a902c01d4c98b002df55ff Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 31 May 2005 10:21:11 -0700
Subject: [PATCH] USB: wireless usb <linux/usb_ch9.h> declarations

This provides declarations for new requests, descriptors, and bitfields as
defined in the Wireless USB 1.0 spec.  Device support will involve a new
"Wire Adapter" device class, connecting a USB Host to a cluster of wireless
USB devices.  There will be two adapter types:

  * Host Wireless Adapter (HWA):  the downstream link is wireless, which
    connects a wireless USB host to wireless USB devices (not unlike like
    a hub) including to the second type of adapter.

  * Device Wireless Adapter (DWA): the upstream link is wireless, for
    connecting existing USB devices through wired links into the cluser.

All wireless USB devices will need persistent (and secure!) key storage, and
it's probable that Linux -- or device firmware -- will need to be involved
with that to bootstrap the initial secure key exchange.

Some user interface is required in that initial key exchange, and since the
most "hands-off" one is a wired USB link, I suspect wireless operation will
usually not be the only mode for wireless USB devices.  (Plus, devices can
recharge batteries using wired USB...)  All other key exchange protocols need
error prone user interactions, like copying and/or verifying keys.

It'll likely be a while before we have commercial Wireless USB hardware,
much less Linux implementations that know how to use it.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_ch9.h | 183 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_ch9.h b/include/linux/usb_ch9.h
index f5fe94e09a0..39e7ff4ffd2 100644
--- a/include/linux/usb_ch9.h
+++ b/include/linux/usb_ch9.h
@@ -6,11 +6,14 @@
  *
  * - the master/host side Linux-USB kernel driver API;
  * - the "usbfs" user space API; and
- * - (eventually) a Linux "gadget" slave/device side driver API.
+ * - the Linux "gadget" slave/device/peripheral side driver API.
  *
  * USB 2.0 adds an additional "On The Go" (OTG) mode, which lets systems
  * act either as a USB master/host or as a USB slave/device.  That means
- * the master and slave side APIs will benefit from working well together.
+ * the master and slave side APIs benefit from working well together.
+ *
+ * There's also "Wireless USB", using low power short range radios for
+ * peripheral interconnection but otherwise building on the USB framework.
  */
 
 #ifndef __LINUX_USB_CH9_H
@@ -68,6 +71,18 @@
 #define USB_REQ_SET_INTERFACE		0x0B
 #define USB_REQ_SYNCH_FRAME		0x0C
 
+#define USB_REQ_SET_ENCRYPTION		0x0D	/* Wireless USB */
+#define USB_REQ_GET_ENCRYPTION		0x0E
+#define USB_REQ_SET_HANDSHAKE		0x0F
+#define USB_REQ_GET_HANDSHAKE		0x10
+#define USB_REQ_SET_CONNECTION		0x11
+#define USB_REQ_SET_SECURITY_DATA	0x12
+#define USB_REQ_GET_SECURITY_DATA	0x13
+#define USB_REQ_SET_WUSB_DATA		0x14
+#define USB_REQ_LOOPBACK_DATA_WRITE	0x15
+#define USB_REQ_LOOPBACK_DATA_READ	0x16
+#define USB_REQ_SET_INTERFACE_DS	0x17
+
 /*
  * USB feature flags are written using USB_REQ_{CLEAR,SET}_FEATURE, and
  * are read as a bit array returned by USB_REQ_GET_STATUS.  (So there
@@ -75,10 +90,12 @@
  */
 #define USB_DEVICE_SELF_POWERED		0	/* (read only) */
 #define USB_DEVICE_REMOTE_WAKEUP	1	/* dev may initiate wakeup */
-#define USB_DEVICE_TEST_MODE		2	/* (high speed only) */
-#define USB_DEVICE_B_HNP_ENABLE		3	/* dev may initiate HNP */
-#define USB_DEVICE_A_HNP_SUPPORT	4	/* RH port supports HNP */
-#define USB_DEVICE_A_ALT_HNP_SUPPORT	5	/* other RH port does */
+#define USB_DEVICE_TEST_MODE		2	/* (wired high speed only) */
+#define USB_DEVICE_BATTERY		2	/* (wireless) */
+#define USB_DEVICE_B_HNP_ENABLE		3	/* (otg) dev may initiate HNP */
+#define USB_DEVICE_WUSB_DEVICE		3	/* (wireless)*/
+#define USB_DEVICE_A_HNP_SUPPORT	4	/* (otg) RH port supports HNP */
+#define USB_DEVICE_A_ALT_HNP_SUPPORT	5	/* (otg) other RH port does */
 #define USB_DEVICE_DEBUG_MODE		6	/* (special devices only) */
 
 #define USB_ENDPOINT_HALT		0	/* IN/OUT will STALL */
@@ -135,6 +152,13 @@ struct usb_ctrlrequest {
 #define USB_DT_OTG			0x09
 #define USB_DT_DEBUG			0x0a
 #define USB_DT_INTERFACE_ASSOCIATION	0x0b
+/* these are from the Wireless USB spec */
+#define USB_DT_SECURITY			0x0c
+#define USB_DT_KEY			0x0d
+#define USB_DT_ENCRYPTION_TYPE		0x0e
+#define USB_DT_BOS			0x0f
+#define USB_DT_DEVICE_CAPABILITY	0x10
+#define USB_DT_WIRELESS_ENDPOINT_COMP	0x11
 
 /* conventional codes for class-specific descriptors */
 #define USB_DT_CS_DEVICE		0x21
@@ -192,6 +216,7 @@ struct usb_device_descriptor {
 #define USB_CLASS_CSCID			0x0b	/* chip+ smart card */
 #define USB_CLASS_CONTENT_SEC		0x0d	/* content security */
 #define USB_CLASS_VIDEO			0x0e
+#define USB_CLASS_WIRELESS_CONTROLLER	0xe0
 #define USB_CLASS_APP_SPEC		0xfe
 #define USB_CLASS_VENDOR_SPEC		0xff
 
@@ -223,6 +248,7 @@ struct usb_config_descriptor {
 #define USB_CONFIG_ATT_ONE		(1 << 7)	/* must be set */
 #define USB_CONFIG_ATT_SELFPOWER	(1 << 6)	/* self powered */
 #define USB_CONFIG_ATT_WAKEUP		(1 << 5)	/* can wakeup */
+#define USB_CONFIG_ATT_BATTERY		(1 << 4)	/* battery powered */
 
 /*-------------------------------------------------------------------------*/
 
@@ -289,6 +315,7 @@ struct usb_endpoint_descriptor {
 #define USB_ENDPOINT_XFER_ISOC		1
 #define USB_ENDPOINT_XFER_BULK		2
 #define USB_ENDPOINT_XFER_INT		3
+#define USB_ENDPOINT_MAX_ADJUSTABLE	0x80
 
 
 /*-------------------------------------------------------------------------*/
@@ -350,6 +377,147 @@ struct usb_interface_assoc_descriptor {
 } __attribute__ ((packed));
 
 
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_SECURITY:  group of wireless security descriptors, including
+ * encryption types available for setting up a CC/association.
+ */
+struct usb_security_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__le16 wTotalLength;
+	__u8  bNumEncryptionTypes;
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_KEY:  used with {GET,SET}_SECURITY_DATA; only public keys
+ * may be retrieved.
+ */
+struct usb_key_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__u8  tTKID[3];
+	__u8  bReserved;
+	__u8  bKeyData[0];
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_ENCRYPTION_TYPE:  bundled in DT_SECURITY groups */
+struct usb_encryption_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__u8  bEncryptionType;
+#define	USB_ENC_TYPE_UNSECURE		0
+#define	USB_ENC_TYPE_WIRED		1	/* non-wireless mode */
+#define	USB_ENC_TYPE_CCM_1		2	/* aes128/cbc session */
+#define	USB_ENC_TYPE_RSA_1		3	/* rsa3072/sha1 auth */
+	__u8  bEncryptionValue;		/* use in SET_ENCRYPTION */
+	__u8  bAuthKeyIndex;
+};
+
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_BOS:  group of wireless capabilities */
+struct usb_bos_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__le16 wTotalLength;
+	__u8  bNumDeviceCaps;
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_DEVICE_CAPABILITY:  grouped with BOS */
+struct usb_dev_cap_header {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDevCapabilityType;
+};
+
+#define	USB_CAP_TYPE_WIRELESS_USB	1
+
+struct usb_wireless_cap_descriptor {	/* Ultra Wide Band */
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDevCapabilityType;
+
+	__u8  bmAttributes;
+#define	USB_WIRELESS_P2P_DRD		(1 << 1)
+#define	USB_WIRELESS_BEACON_MASK	(3 << 2)
+#define	USB_WIRELESS_BEACON_SELF	(1 << 2)
+#define	USB_WIRELESS_BEACON_DIRECTED	(2 << 2)
+#define	USB_WIRELESS_BEACON_NONE	(3 << 2)
+	__le16 wPHYRates;	/* bit rates, Mbps */
+#define	USB_WIRELESS_PHY_53		(1 << 0)	/* always set */
+#define	USB_WIRELESS_PHY_80		(1 << 1)
+#define	USB_WIRELESS_PHY_107		(1 << 2)	/* always set */
+#define	USB_WIRELESS_PHY_160		(1 << 3)
+#define	USB_WIRELESS_PHY_200		(1 << 4)	/* always set */
+#define	USB_WIRELESS_PHY_320		(1 << 5)
+#define	USB_WIRELESS_PHY_400		(1 << 6)
+#define	USB_WIRELESS_PHY_480		(1 << 7)
+	__u8  bmTFITXPowerInfo;	/* TFI power levels */
+	__u8  bmFFITXPowerInfo;	/* FFI power levels */
+	__le16 bmBandGroup;
+	__u8  bReserved;
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_DT_WIRELESS_ENDPOINT_COMP:  companion descriptor associated with
+ * each endpoint descriptor for a wireless device
+ */
+struct usb_wireless_ep_comp_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__u8  bMaxBurst;
+	__u8  bMaxSequence;
+	__le16 wMaxStreamDelay;
+	__le16 wOverTheAirPacketSize;
+	__u8  bOverTheAirInterval;
+	__u8  bmCompAttributes;
+#define USB_ENDPOINT_SWITCH_MASK	0x03	/* in bmCompAttributes */
+#define USB_ENDPOINT_SWITCH_NO		0
+#define USB_ENDPOINT_SWITCH_SWITCH	1
+#define USB_ENDPOINT_SWITCH_SCALE	2
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_REQ_SET_HANDSHAKE is a four-way handshake used between a wireless
+ * host and a device for connection set up, mutual authentication, and
+ * exchanging short lived session keys.  The handshake depends on a CC.
+ */
+struct usb_handshake {
+	__u8 bMessageNumber;
+	__u8 bStatus;
+	__u8 tTKID[3];
+	__u8 bReserved;
+	__u8 CDID[16];
+	__u8 nonce[16];
+	__u8 MIC[8];
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* USB_REQ_SET_CONNECTION modifies or revokes a connection context (CC).
+ * A CC may also be set up using non-wireless secure channels (including
+ * wired USB!), and some devices may support CCs with multiple hosts.
+ */
+struct usb_connection_context {
+	__u8 CHID[16];		/* persistent host id */
+	__u8 CDID[16];		/* device id (unique w/in host context) */
+	__u8 CK[16];		/* connection key */
+};
+
 /*-------------------------------------------------------------------------*/
 
 /* USB 2.0 defines three speeds, here's how Linux identifies them */
@@ -357,7 +525,8 @@ struct usb_interface_assoc_descriptor {
 enum usb_device_speed {
 	USB_SPEED_UNKNOWN = 0,			/* enumerating */
 	USB_SPEED_LOW, USB_SPEED_FULL,		/* usb 1.1 */
-	USB_SPEED_HIGH				/* usb 2.0 */
+	USB_SPEED_HIGH,				/* usb 2.0 */
+	USB_SPEED_VARIABLE,			/* wireless (usb 2.5) */
 };
 
 enum usb_device_state {
-- 
cgit v1.2.3-70-g09d2


From 8c8709334cec803368a432a33e0f2e116d48fe07 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 27 Jun 2005 14:36:34 -0700
Subject: [PATCH] ppc32: Remove CONFIG_PMAC_PBOOK

This patch removes CONFIG_PMAC_PBOOK (PowerBook support).  This is now
split into CONFIG_PMAC_MEDIABAY for the actual hotswap bay that some
powerbooks have, CONFIG_PM for power management related code, and just left
out of any CONFIG_* option for some generally useful stuff that can be used
on non-laptops as well.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/platforms/pmac_sleep.S     |   4 +-
 arch/ppc/platforms/pmac_time.c      |   8 +-
 drivers/block/swim3.c               |  10 +-
 drivers/char/misc.c                 |   3 -
 drivers/ide/ppc/pmac.c              |   8 +-
 drivers/ieee1394/ohci1394.c         |  10 +-
 drivers/macintosh/Kconfig           |  35 ++-----
 drivers/macintosh/Makefile          |   2 +-
 drivers/macintosh/adb.c             |  10 +-
 drivers/macintosh/via-pmu.c         |  70 ++++++--------
 drivers/usb/host/ohci-pci.c         |  13 +--
 drivers/video/aty/aty128fb.c        |  14 ---
 drivers/video/chipsfb.c             | 176 +++++++++++++++++++-----------------
 include/linux/pmu.h                 |   6 +-
 sound/oss/dmasound/dmasound_awacs.c |  14 +--
 sound/ppc/awacs.c                   |   8 +-
 sound/ppc/daca.c                    |   6 +-
 sound/ppc/pmac.c                    |  11 ++-
 sound/ppc/pmac.h                    |   2 +-
 sound/ppc/tumbler.c                 |   4 +-
 20 files changed, 193 insertions(+), 221 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ppc/platforms/pmac_sleep.S b/arch/ppc/platforms/pmac_sleep.S
index f459ade1bd6..016a7464915 100644
--- a/arch/ppc/platforms/pmac_sleep.S
+++ b/arch/ppc/platforms/pmac_sleep.S
@@ -46,7 +46,7 @@
 	.section .text
 	.align	5
 
-#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC)
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC)
 
 /* This gets called by via-pmu.c late during the sleep process.
  * The PMU was already send the sleep command and will shut us down
@@ -382,7 +382,7 @@ turn_on_mmu:
 	isync
 	rfi
 
-#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
 	.section .data
 	.balign	L1_CACHE_LINE_SIZE
diff --git a/arch/ppc/platforms/pmac_time.c b/arch/ppc/platforms/pmac_time.c
index de60ccc7db9..778ce4fec36 100644
--- a/arch/ppc/platforms/pmac_time.c
+++ b/arch/ppc/platforms/pmac_time.c
@@ -206,7 +206,7 @@ via_calibrate_decr(void)
 	return 1;
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * Reset the time after a sleep.
  */
@@ -238,7 +238,7 @@ time_sleep_notify(struct pmu_sleep_notifier *self, int when)
 static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = {
 	time_sleep_notify, SLEEP_LEVEL_MISC,
 };
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 /*
  * Query the OF and get the decr frequency.
@@ -251,9 +251,9 @@ pmac_calibrate_decr(void)
 	struct device_node *cpu;
 	unsigned int freq, *fp;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	pmu_register_sleep_notifier(&time_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 	/* We assume MacRISC2 machines have correct device-tree
 	 * calibration. That's better since the VIA itself seems
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 5b09cf154ac..e5f7494c00e 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -253,7 +253,7 @@ static int floppy_revalidate(struct gendisk *disk);
 static int swim3_add_device(struct device_node *swims);
 int swim3_init(void);
 
-#ifndef CONFIG_PMAC_PBOOK
+#ifndef CONFIG_PMAC_MEDIABAY
 #define check_media_bay(which, what)	1
 #endif
 
@@ -297,9 +297,11 @@ static void do_fd_request(request_queue_t * q)
 	int i;
 	for(i=0;i<floppy_count;i++)
 	{
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (floppy_states[i].media_bay &&
 			check_media_bay(floppy_states[i].media_bay, MB_FD))
 			continue;
+#endif /* CONFIG_PMAC_MEDIABAY */
 		start_request(&floppy_states[i]);
 	}
 	sti();
@@ -856,8 +858,10 @@ static int floppy_ioctl(struct inode *inode, struct file *filp,
 	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
+#endif
 
 	switch (cmd) {
 	case FDEJECT:
@@ -881,8 +885,10 @@ static int floppy_open(struct inode *inode, struct file *filp)
 	int n, err = 0;
 
 	if (fs->ref_count == 0) {
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 			return -ENXIO;
+#endif
 		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
 		out_8(&sw->control_bic, 0xff);
 		out_8(&sw->mode, 0x95);
@@ -967,8 +973,10 @@ static int floppy_revalidate(struct gendisk *disk)
 	struct swim3 __iomem *sw;
 	int ret, n;
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
+#endif
 
 	sw = fs->swim3;
 	grab_drive(fs, revalidating, 0);
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 31cf84d6902..931efd58f87 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -308,9 +308,6 @@ static int __init misc_init(void)
 #endif
 #ifdef CONFIG_BVME6000
 	rtc_DP8570A_init();
-#endif
-#ifdef CONFIG_PMAC_PBOOK
-	pmu_device_init();
 #endif
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) {
 		printk("unable to get major %d for misc devices\n",
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index 569f1676744..818380b5fd2 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1324,9 +1324,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	/* XXX FIXME: Media bay stuff need re-organizing */
 	if (np->parent && np->parent->name
 	    && strcasecmp(np->parent->name, "media-bay") == 0) {
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 		media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq, hwif->index);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 		pmif->mediabay = 1;
 		if (!bidp)
 			pmif->aapl_bus_id = 1;
@@ -1382,10 +1382,10 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	       hwif->index, model_name[pmif->kind], pmif->aapl_bus_id,
 	       pmif->mediabay ? " (mediabay)" : "", hwif->irq);
 			
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (pmif->mediabay && check_media_bay_by_base(pmif->regbase, MB_CD) == 0)
 		hwif->noprobe = 0;
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 
 	hwif->sg_max_nents = MAX_DCMDS;
 
diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index 36e25ac823d..b3d3d22fde6 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -3538,8 +3538,8 @@ static void ohci1394_pci_remove(struct pci_dev *pdev)
 
 static int ohci1394_pci_resume (struct pci_dev *pdev)
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 
 		/* Re-enable 1394 */
@@ -3547,7 +3547,7 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 		if (of_node)
 			pmac_call_feature (PMAC_FTR_1394_ENABLE, of_node, 0, 1);
 	}
-#endif
+#endif /* CONFIG_PPC_PMAC */
 
 	pci_enable_device(pdev);
 
@@ -3557,8 +3557,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 
 		/* Disable 1394 */
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index b0ace5bc950..91691a6c004 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -86,33 +86,18 @@ config PMAC_SMU
 	  on the "SMU" system control chip which replaces the old PMU.
 	  If you don't know, say Y.
 
-config PMAC_PBOOK
-	bool "Power management support for PowerBooks"
-	depends on ADB_PMU
-	---help---
-	  This provides support for putting a PowerBook to sleep; it also
-	  enables media bay support.  Power management works on the
-	  PB2400/3400/3500, Wallstreet, Lombard, and Bronze PowerBook G3 and
-	  the Titanium Powerbook G4, as well as the iBooks.  You should get
-	  the power management daemon, pmud, to make it work and you must have
-	  the /dev/pmu device (see the pmud README).
-
-	  Get pmud from <ftp://ftp.samba.org/pub/ppclinux/pmud/>.
-
-	  If you have a PowerBook, you should say Y here.
-
-	  You may also want to compile the dma sound driver as a module and
-	  have it autoloaded. The act of removing the module shuts down the
-	  sound hardware for more power savings.
-
-config PM
-	bool
-	depends on PPC_PMAC && ADB_PMU && PMAC_PBOOK
-	default y
-
 config PMAC_APM_EMU
 	tristate "APM emulation"
-	depends on PMAC_PBOOK
+	depends on PPC_PMAC && PPC32 && PM
+
+config PMAC_MEDIABAY
+	bool "Support PowerBook hotswap media bay"
+	depends on PPC_PMAC && PPC32
+	help
+	  This option adds support for older PowerBook's hotswap media bay
+	  that can contains batteries, floppy drives, or IDE devices. PCI
+	  devices are not fully supported in the bay as I never had one to
+	  try with
 
 # made a separate option since backlight may end up beeing used
 # on non-powerbook machines (but only on PMU based ones AFAIK)
diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
index b3f88a4fcef..f5ae171dbfe 100644
--- a/drivers/macintosh/Makefile
+++ b/drivers/macintosh/Makefile
@@ -6,7 +6,7 @@
 
 obj-$(CONFIG_PPC_PMAC)		+= macio_asic.o
 
-obj-$(CONFIG_PMAC_PBOOK)	+= mediabay.o
+obj-$(CONFIG_PMAC_MEDIABAY)	+= mediabay.o
 obj-$(CONFIG_MAC_EMUMOUSEBTN)	+= mac_hid.o
 obj-$(CONFIG_INPUT_ADBHID)	+= adbhid.o
 obj-$(CONFIG_ANSLCD)		+= ans-lcd.o
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 493e2afa191..c0dc1e3fa58 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -90,7 +90,7 @@ static int sleepy_trackpad;
 static int autopoll_devs;
 int __adb_probe_sync;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 static int adb_notify_sleep(struct pmu_sleep_notifier *self, int when);
 static struct pmu_sleep_notifier adb_sleep_notifier = {
 	adb_notify_sleep,
@@ -320,9 +320,9 @@ int __init adb_init(void)
 		printk(KERN_WARNING "Warning: no ADB interface detected\n");
 		adb_controller = NULL;
 	} else {
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 		pmu_register_sleep_notifier(&adb_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 #ifdef CONFIG_PPC
 		if (machine_is_compatible("AAPL,PowerBook1998") ||
 			machine_is_compatible("PowerBook1,1"))
@@ -337,7 +337,7 @@ int __init adb_init(void)
 
 __initcall(adb_init);
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * notify clients before sleep and reset bus afterwards
  */
@@ -378,7 +378,7 @@ adb_notify_sleep(struct pmu_sleep_notifier *self, int when)
 	}
 	return PBOOK_SLEEP_OK;
 }
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 static int
 do_adb_reset_bus(void)
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 5375df03c6f..4a0a0ad2d03 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -155,10 +155,10 @@ static spinlock_t pmu_lock;
 static u8 pmu_intr_mask;
 static int pmu_version;
 static int drop_interrupts;
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 static int option_lid_wakeup = 1;
 static int sleep_in_progress;
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 static unsigned long async_req_locks;
 static unsigned int pmu_irq_stats[11];
 
@@ -168,7 +168,6 @@ static struct proc_dir_entry *proc_pmu_irqstats;
 static struct proc_dir_entry *proc_pmu_options;
 static int option_server_mode;
 
-#ifdef CONFIG_PMAC_PBOOK
 int pmu_battery_count;
 int pmu_cur_battery;
 unsigned int pmu_power_flags;
@@ -176,7 +175,6 @@ struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES];
 static int query_batt_timer = BATTERY_POLLING_COUNT;
 static struct adb_request batt_req;
 static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES];
-#endif /* CONFIG_PMAC_PBOOK */
 
 #if defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_PMAC_BACKLIGHT)
 extern int disable_kernel_backlight;
@@ -210,11 +208,9 @@ static int proc_get_irqstats(char *page, char **start, off_t off,
 static int pmu_set_backlight_level(int level, void* data);
 static int pmu_set_backlight_enable(int on, int level, void* data);
 #endif /* CONFIG_PMAC_BACKLIGHT */
-#ifdef CONFIG_PMAC_PBOOK
 static void pmu_pass_intr(unsigned char *data, int len);
 static int proc_get_batt(char *page, char **start, off_t off,
 			int count, int *eof, void *data);
-#endif /* CONFIG_PMAC_PBOOK */
 static int proc_read_options(char *page, char **start, off_t off,
 			int count, int *eof, void *data);
 static int proc_write_options(struct file *file, const char __user *buffer,
@@ -407,9 +403,7 @@ static int __init via_pmu_start(void)
 
 	bright_req_1.complete = 1;
 	bright_req_2.complete = 1;
-#ifdef CONFIG_PMAC_PBOOK
 	batt_req.complete = 1;
-#endif
 
 #ifdef CONFIG_PPC32
 	if (pmu_kind == PMU_KEYLARGO_BASED)
@@ -468,7 +462,7 @@ static int __init via_pmu_dev_init(void)
 	register_backlight_controller(&pmu_backlight_controller, NULL, "pmu");
 #endif /* CONFIG_PMAC_BACKLIGHT */
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PPC32
   	if (machine_is_compatible("AAPL,3400/2400") ||
   		machine_is_compatible("AAPL,3500")) {
 		int mb = pmac_call_feature(PMAC_FTR_GET_MB_INFO,
@@ -496,20 +490,19 @@ static int __init via_pmu_dev_init(void)
 				pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART;
 		}
 	}
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PPC32 */
+
 	/* Create /proc/pmu */
 	proc_pmu_root = proc_mkdir("pmu", NULL);
 	if (proc_pmu_root) {
-#ifdef CONFIG_PMAC_PBOOK
-		int i;
+		long i;
 
 		for (i=0; i<pmu_battery_count; i++) {
 			char title[16];
-			sprintf(title, "battery_%d", i);
+			sprintf(title, "battery_%ld", i);
 			proc_pmu_batt[i] = create_proc_read_entry(title, 0, proc_pmu_root,
 						proc_get_batt, (void *)i);
 		}
-#endif /* CONFIG_PMAC_PBOOK */
 
 		proc_pmu_info = create_proc_read_entry("info", 0, proc_pmu_root,
 					proc_get_info, NULL);
@@ -629,8 +622,6 @@ static void pmu_set_server_mode(int server_mode)
 	pmu_wait_complete(&req);
 }
 
-#ifdef CONFIG_PMAC_PBOOK
-
 /* This new version of the code for 2400/3400/3500 powerbooks
  * is inspired from the implementation in gkrellm-pmu
  */
@@ -813,8 +804,6 @@ query_battery_state(void)
 			2, PMU_SMART_BATTERY_STATE, pmu_cur_battery+1);
 }
 
-#endif /* CONFIG_PMAC_PBOOK */
-
 static int __pmac
 proc_get_info(char *page, char **start, off_t off,
 		int count, int *eof, void *data)
@@ -823,11 +812,9 @@ proc_get_info(char *page, char **start, off_t off,
 
 	p += sprintf(p, "PMU driver version     : %d\n", PMU_DRIVER_VERSION);
 	p += sprintf(p, "PMU firmware version   : %02x\n", pmu_version);
-#ifdef CONFIG_PMAC_PBOOK
 	p += sprintf(p, "AC Power               : %d\n",
 		((pmu_power_flags & PMU_PWR_AC_PRESENT) != 0));
 	p += sprintf(p, "Battery count          : %d\n", pmu_battery_count);
-#endif /* CONFIG_PMAC_PBOOK */
 
 	return p - page;
 }
@@ -859,12 +846,11 @@ proc_get_irqstats(char *page, char **start, off_t off,
 	return p - page;
 }
 
-#ifdef CONFIG_PMAC_PBOOK
 static int __pmac
 proc_get_batt(char *page, char **start, off_t off,
 		int count, int *eof, void *data)
 {
-	int batnum = (int)data;
+	long batnum = (long)data;
 	char *p = page;
 	
 	p += sprintf(p, "\n");
@@ -883,7 +869,6 @@ proc_get_batt(char *page, char **start, off_t off,
 
 	return p - page;
 }
-#endif /* CONFIG_PMAC_PBOOK */
 
 static int __pmac
 proc_read_options(char *page, char **start, off_t off,
@@ -891,11 +876,11 @@ proc_read_options(char *page, char **start, off_t off,
 {
 	char *p = page;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	if (pmu_kind == PMU_KEYLARGO_BASED &&
 	    pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) >= 0)
 		p += sprintf(p, "lid_wakeup=%d\n", option_lid_wakeup);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif
 	if (pmu_kind == PMU_KEYLARGO_BASED)
 		p += sprintf(p, "server_mode=%d\n", option_server_mode);
 
@@ -932,12 +917,12 @@ proc_write_options(struct file *file, const char __user *buffer,
 	*(val++) = 0;
 	while(*val == ' ')
 		val++;
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	if (pmu_kind == PMU_KEYLARGO_BASED &&
 	    pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) >= 0)
 		if (!strcmp(label, "lid_wakeup"))
 			option_lid_wakeup = ((*val) == '1');
-#endif /* CONFIG_PMAC_PBOOK */
+#endif
 	if (pmu_kind == PMU_KEYLARGO_BASED && !strcmp(label, "server_mode")) {
 		int new_value;
 		new_value = ((*val) == '1');
@@ -1432,7 +1417,6 @@ next:
 	}
 	/* Tick interrupt */
 	else if ((1 << pirq) & PMU_INT_TICK) {
-#ifdef CONFIG_PMAC_PBOOK
 		/* Environement or tick interrupt, query batteries */
 		if (pmu_battery_count) {
 			if ((--query_batt_timer) == 0) {
@@ -1447,7 +1431,6 @@ next:
 		pmu_pass_intr(data, len);
 	} else {
 	       pmu_pass_intr(data, len);
-#endif /* CONFIG_PMAC_PBOOK */
 	}
 	goto next;
 }
@@ -2062,7 +2045,7 @@ pmu_i2c_simple_write(int bus, int addr,  u8* data, int len)
 	return -1;
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 
 static LIST_HEAD(sleep_notifiers);
 
@@ -2715,6 +2698,8 @@ powerbook_sleep_3400(void)
 	return 0;
 }
 
+#endif /* CONFIG_PM */
+
 /*
  * Support for /dev/pmu device
  */
@@ -2894,11 +2879,11 @@ static int __pmac
 pmu_ioctl(struct inode * inode, struct file *filp,
 		     u_int cmd, u_long arg)
 {
-	struct pmu_private *pp = filp->private_data;
 	__u32 __user *argp = (__u32 __user *)arg;
-	int error;
+	int error = -EINVAL;
 
 	switch (cmd) {
+#ifdef CONFIG_PM
 	case PMU_IOC_SLEEP:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
@@ -2920,12 +2905,13 @@ pmu_ioctl(struct inode * inode, struct file *filp,
 			error = -ENOSYS;
 		}
 		sleep_in_progress = 0;
-		return error;
+		break;
 	case PMU_IOC_CAN_SLEEP:
 		if (pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) < 0)
 			return put_user(0, argp);
 		else
 			return put_user(1, argp);
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PMAC_BACKLIGHT
 	/* Backlight should have its own device or go via
@@ -2946,11 +2932,13 @@ pmu_ioctl(struct inode * inode, struct file *filp,
 		error = get_user(value, argp);
 		if (!error)
 			error = set_backlight_level(value);
-		return error;
+		break;
 	}
 #ifdef CONFIG_INPUT_ADBHID
 	case PMU_IOC_GRAB_BACKLIGHT: {
+		struct pmu_private *pp = filp->private_data;
 		unsigned long flags;
+
 		if (pp->backlight_locker)
 			return 0;
 		pp->backlight_locker = 1;
@@ -2966,7 +2954,7 @@ pmu_ioctl(struct inode * inode, struct file *filp,
 	case PMU_IOC_HAS_ADB:
 		return put_user(pmu_has_adb, argp);
 	}
-	return -EINVAL;
+	return error;
 }
 
 static struct file_operations pmu_device_fops __pmacdata = {
@@ -2982,14 +2970,16 @@ static struct miscdevice pmu_device __pmacdata = {
 	PMU_MINOR, "pmu", &pmu_device_fops
 };
 
-void pmu_device_init(void)
+static int pmu_device_init(void)
 {
 	if (!via)
-		return;
+		return 0;
 	if (misc_register(&pmu_device) < 0)
 		printk(KERN_ERR "via-pmu: cannot register misc device.\n");
+	return 0;
 }
-#endif /* CONFIG_PMAC_PBOOK */
+device_initcall(pmu_device_init);
+
 
 #ifdef DEBUG_SLEEP
 static inline void  __pmac
@@ -3157,12 +3147,12 @@ EXPORT_SYMBOL(pmu_i2c_combined_read);
 EXPORT_SYMBOL(pmu_i2c_stdsub_write);
 EXPORT_SYMBOL(pmu_i2c_simple_read);
 EXPORT_SYMBOL(pmu_i2c_simple_write);
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 EXPORT_SYMBOL(pmu_register_sleep_notifier);
 EXPORT_SYMBOL(pmu_unregister_sleep_notifier);
 EXPORT_SYMBOL(pmu_enable_irled);
 EXPORT_SYMBOL(pmu_battery_count);
 EXPORT_SYMBOL(pmu_batteries);
 EXPORT_SYMBOL(pmu_power_flags);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index 57fd07d0054..eede6be098d 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -14,14 +14,11 @@
  * This file is licenced under the GPL.
  */
  
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
 #include <asm/pmac_feature.h>
 #include <asm/pci-bridge.h>
 #include <asm/prom.h>
-#ifndef CONFIG_PM
-#	define CONFIG_PM
-#endif
 #endif
 
 #ifndef CONFIG_PCI
@@ -132,7 +129,7 @@ static int ohci_pci_suspend (struct usb_hcd *hcd, pm_message_t message)
 	/* let things settle down a bit */
 	msleep (100);
 	
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PPC_PMAC
 	if (_machine == _MACH_Pmac) {
 	   	struct device_node	*of_node;
  
@@ -141,7 +138,7 @@ static int ohci_pci_suspend (struct usb_hcd *hcd, pm_message_t message)
 		if (of_node)
 			pmac_call_feature(PMAC_FTR_USB_ENABLE, of_node, 0, 0);
 	}
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PPC_PMAC */
 	return 0;
 }
 
@@ -151,7 +148,7 @@ static int ohci_pci_resume (struct usb_hcd *hcd)
 	struct ohci_hcd		*ohci = hcd_to_ohci (hcd);
 	int			retval = 0;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PPC_PMAC
 	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 
@@ -160,7 +157,7 @@ static int ohci_pci_resume (struct usb_hcd *hcd)
 		if (of_node)
 			pmac_call_feature (PMAC_FTR_USB_ENABLE, of_node, 0, 1);
 	}
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PPC_PMAC */
 
 	/* resume root hub */
 	if (time_before (jiffies, ohci->next_statechange))
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 9789115980a..7bc1d44d881 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -350,10 +350,8 @@ static int default_vmode __initdata = VMODE_1024_768_60;
 static int default_cmode __initdata = CMODE_8;
 #endif
 
-#ifdef CONFIG_PMAC_PBOOK
 static int default_crt_on __initdata = 0;
 static int default_lcd_on __initdata = 1;
-#endif
 
 #ifdef CONFIG_MTRR
 static int mtrr = 1;
@@ -1249,7 +1247,6 @@ static int aty128_crtc_to_var(const struct aty128_crtc *crtc,
 	return 0;
 }
 
-#ifdef CONFIG_PMAC_PBOOK
 static void aty128_set_crt_enable(struct aty128fb_par *par, int on)
 {
 	if (on) {
@@ -1284,7 +1281,6 @@ static void aty128_set_lcd_enable(struct aty128fb_par *par, int on)
 		aty_st_le32(LVDS_GEN_CNTL, reg);
 	}
 }
-#endif /* CONFIG_PMAC_PBOOK */
 
 static void aty128_set_pll(struct aty128_pll *pll, const struct aty128fb_par *par)
 {
@@ -1491,12 +1487,10 @@ static int aty128fb_set_par(struct fb_info *info)
 	info->fix.visual = par->crtc.bpp == 8 ? FB_VISUAL_PSEUDOCOLOR
 		: FB_VISUAL_DIRECTCOLOR;
 
-#ifdef CONFIG_PMAC_PBOOK
 	if (par->chip_gen == rage_M3) {
 		aty128_set_crt_enable(par, par->crt_on);
 		aty128_set_lcd_enable(par, par->lcd_on);
 	}
-#endif
 	if (par->accel_flags & FB_ACCELF_TEXT)
 		aty128_init_engine(par);
 
@@ -1652,7 +1646,6 @@ static int __init aty128fb_setup(char *options)
 		return 0;
 
 	while ((this_opt = strsep(&options, ",")) != NULL) {
-#ifdef CONFIG_PMAC_PBOOK
 		if (!strncmp(this_opt, "lcd:", 4)) {
 			default_lcd_on = simple_strtoul(this_opt+4, NULL, 0);
 			continue;
@@ -1660,7 +1653,6 @@ static int __init aty128fb_setup(char *options)
 			default_crt_on = simple_strtoul(this_opt+4, NULL, 0);
 			continue;
 		}
-#endif
 #ifdef CONFIG_MTRR
 		if(!strncmp(this_opt, "nomtrr", 6)) {
 			mtrr = 0;
@@ -1752,10 +1744,8 @@ static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id *
 	info->fbops = &aty128fb_ops;
 	info->flags = FBINFO_FLAG_DEFAULT;
 
-#ifdef CONFIG_PMAC_PBOOK
 	par->lcd_on = default_lcd_on;
 	par->crt_on = default_crt_on;
-#endif
 
 	var = default_var;
 #ifdef CONFIG_PPC_PMAC
@@ -2035,12 +2025,10 @@ static int aty128fb_blank(int blank, struct fb_info *fb)
 
 	aty_st_8(CRTC_EXT_CNTL+1, state);
 
-#ifdef CONFIG_PMAC_PBOOK
 	if (par->chip_gen == rage_M3) {
 		aty128_set_crt_enable(par, par->crt_on && !blank);
 		aty128_set_lcd_enable(par, par->lcd_on && !blank);
 	}
-#endif	
 #ifdef CONFIG_PMAC_BACKLIGHT
 	if ((_machine == _MACH_Pmac) && !blank)
 		set_backlight_enable(1);
@@ -2124,7 +2112,6 @@ static int aty128fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 static int aty128fb_ioctl(struct inode *inode, struct file *file, u_int cmd,
 			  u_long arg, struct fb_info *info)
 {
-#ifdef CONFIG_PMAC_PBOOK
 	struct aty128fb_par *par = info->par;
 	u32 value;
 	int rc;
@@ -2149,7 +2136,6 @@ static int aty128fb_ioctl(struct inode *inode, struct file *file, u_int cmd,
 		value = (par->crt_on << 1) | par->lcd_on;
 		return put_user(value, (__u32 __user *)arg);
 	}
-#endif
 	return -EINVAL;
 }
 
diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c
index 95e72550d43..e75a965ec76 100644
--- a/drivers/video/chipsfb.c
+++ b/drivers/video/chipsfb.c
@@ -28,22 +28,17 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/console.h>
 #include <asm/io.h>
 
 #ifdef CONFIG_PMAC_BACKLIGHT
 #include <asm/backlight.h>
 #endif
-#ifdef CONFIG_PMAC_PBOOK
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#endif
 
 /*
  * Since we access the display with inb/outb to fixed port numbers,
  * we can only handle one 6555x chip.  -- paulus
  */
-static struct fb_info chipsfb_info;
-
 #define write_ind(num, val, ap, dp)	do { \
 	outb((num), (ap)); outb((val), (dp)); \
 } while (0)
@@ -74,14 +69,6 @@ static struct fb_info chipsfb_info;
 	inb(0x3da); read_ind(num, var, 0x3c0, 0x3c1); \
 } while (0)
 
-#ifdef CONFIG_PMAC_PBOOK
-static unsigned char *save_framebuffer;
-int chips_sleep_notify(struct pmu_sleep_notifier *self, int when);
-static struct pmu_sleep_notifier chips_sleep_notifier = {
-	chips_sleep_notify, SLEEP_LEVEL_VIDEO,
-};
-#endif
-
 /*
  * Exported functions
  */
@@ -356,6 +343,8 @@ static struct fb_var_screeninfo chipsfb_var __initdata = {
 
 static void __init init_chips(struct fb_info *p, unsigned long addr)
 {
+	memset(p->screen_base, 0, 0x100000);
+
 	p->fix = chipsfb_fix;
 	p->fix.smem_start = addr;
 
@@ -366,34 +355,41 @@ static void __init init_chips(struct fb_info *p, unsigned long addr)
 
 	fb_alloc_cmap(&p->cmap, 256, 0);
 
-	if (register_framebuffer(p) < 0) {
-		printk(KERN_ERR "C&T 65550 framebuffer failed to register\n");
-		return;
-	}
-
-	printk(KERN_INFO "fb%d: Chips 65550 frame buffer (%dK RAM detected)\n",
-		p->node, p->fix.smem_len / 1024);
-
 	chips_hw_init();
 }
 
 static int __devinit
 chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
 {
-	struct fb_info *p = &chipsfb_info;
+	struct fb_info *p;
 	unsigned long addr, size;
 	unsigned short cmd;
+	int rc = -ENODEV;
+
+	if (pci_enable_device(dp) < 0) {
+		dev_err(&dp->dev, "Cannot enable PCI device\n");
+		goto err_out;
+	}
 
 	if ((dp->resource[0].flags & IORESOURCE_MEM) == 0)
-		return -ENODEV;
+		goto err_disable;
 	addr = pci_resource_start(dp, 0);
 	size = pci_resource_len(dp, 0);
 	if (addr == 0)
-		return -ENODEV;
-	if (p->screen_base != 0)
-		return -EBUSY;
-	if (!request_mem_region(addr, size, "chipsfb"))
-		return -EBUSY;
+		goto err_disable;
+
+	p = framebuffer_alloc(0, &dp->dev);
+	if (p == NULL) {
+		dev_err(&dp->dev, "Cannot allocate framebuffer structure\n");
+		rc = -ENOMEM;
+		goto err_disable;
+	}
+
+	if (pci_request_region(dp, 0, "chipsfb") != 0) {
+		dev_err(&dp->dev, "Cannot request framebuffer\n");
+		rc = -EBUSY;
+		goto err_release_fb;
+	}
 
 #ifdef __BIG_ENDIAN
 	addr += 0x800000;	// Use big-endian aperture
@@ -411,37 +407,89 @@ chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
 	set_backlight_enable(1);
 #endif /* CONFIG_PMAC_BACKLIGHT */
 
+#ifdef CONFIG_PPC
 	p->screen_base = __ioremap(addr, 0x200000, _PAGE_NO_CACHE);
+#else
+	p->screen_base = ioremap(addr, 0x200000);
+#endif
 	if (p->screen_base == NULL) {
-		release_mem_region(addr, size);
-		return -ENOMEM;
+		dev_err(&dp->dev, "Cannot map framebuffer\n");
+		rc = -ENOMEM;
+		goto err_release_pci;
 	}
+
+	pci_set_drvdata(dp, p);
 	p->device = &dp->dev;
+
 	init_chips(p, addr);
 
-#ifdef CONFIG_PMAC_PBOOK
-	pmu_register_sleep_notifier(&chips_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+	if (register_framebuffer(p) < 0) {
+		dev_err(&dp->dev,"C&T 65550 framebuffer failed to register\n");
+		goto err_unmap;
+	}
+
+	dev_info(&dp->dev,"fb%d: Chips 65550 frame buffer"
+		 " (%dK RAM detected)\n",
+		 p->node, p->fix.smem_len / 1024);
 
-	pci_set_drvdata(dp, p);
 	return 0;
+
+ err_unmap:
+	iounmap(p->screen_base);
+ err_release_pci:
+	pci_release_region(dp, 0);
+ err_release_fb:
+	framebuffer_release(p);
+ err_disable:
+ err_out:
+	return rc;
 }
 
 static void __devexit chipsfb_remove(struct pci_dev *dp)
 {
 	struct fb_info *p = pci_get_drvdata(dp);
 
-	if (p != &chipsfb_info || p->screen_base == NULL)
+	if (p->screen_base == NULL)
 		return;
 	unregister_framebuffer(p);
 	iounmap(p->screen_base);
 	p->screen_base = NULL;
-	release_mem_region(pci_resource_start(dp, 0), pci_resource_len(dp, 0));
+	pci_release_region(dp, 0);
+}
+
+#ifdef CONFIG_PM
+static int chipsfb_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+        struct fb_info *p = pci_get_drvdata(pdev);
+
+	if (state == pdev->dev.power.power_state)
+		return 0;
+	if (state != PM_SUSPEND_MEM)
+		goto done;
+
+	acquire_console_sem();
+	chipsfb_blank(1, p);
+	fb_set_suspend(p, 1);
+	release_console_sem();
+ done:
+	pdev->dev.power.power_state = state;
+	return 0;
+}
+
+static int chipsfb_pci_resume(struct pci_dev *pdev)
+{
+        struct fb_info *p = pci_get_drvdata(pdev);
 
-#ifdef CONFIG_PMAC_PBOOK
-	pmu_unregister_sleep_notifier(&chips_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+	acquire_console_sem();
+	fb_set_suspend(p, 0);
+	chipsfb_blank(0, p);
+	release_console_sem();
+
+	pdev->dev.power.power_state = PMSG_ON;
+	return 0;
 }
+#endif /* CONFIG_PM */
+
 
 static struct pci_device_id chipsfb_pci_tbl[] = {
 	{ PCI_VENDOR_ID_CT, PCI_DEVICE_ID_CT_65550, PCI_ANY_ID, PCI_ANY_ID },
@@ -455,6 +503,10 @@ static struct pci_driver chipsfb_driver = {
 	.id_table =	chipsfb_pci_tbl,
 	.probe =	chipsfb_pci_init,
 	.remove =	__devexit_p(chipsfb_remove),
+#ifdef CONFIG_PM
+	.suspend =	chipsfb_pci_suspend,
+	.resume =	chipsfb_pci_resume,
+#endif
 };
 
 int __init chips_init(void)
@@ -472,48 +524,4 @@ static void __exit chipsfb_exit(void)
 	pci_unregister_driver(&chipsfb_driver);
 }
 
-#ifdef CONFIG_PMAC_PBOOK
-/*
- * Save the contents of the frame buffer when we go to sleep,
- * and restore it when we wake up again.
- */
-int
-chips_sleep_notify(struct pmu_sleep_notifier *self, int when)
-{
-	struct fb_info *p = &chipsfb_info;
-	int nb = p->var.yres * p->fix.line_length;
-
-	if (p->screen_base == NULL)
-		return PBOOK_SLEEP_OK;
-
-	switch (when) {
-	case PBOOK_SLEEP_REQUEST:
-		save_framebuffer = vmalloc(nb);
-		if (save_framebuffer == NULL)
-			return PBOOK_SLEEP_REFUSE;
-		break;
-	case PBOOK_SLEEP_REJECT:
-		if (save_framebuffer) {
-			vfree(save_framebuffer);
-			save_framebuffer = NULL;
-		}
-		break;
-	case PBOOK_SLEEP_NOW:
-		chipsfb_blank(1, p);
-		if (save_framebuffer)
-			memcpy(save_framebuffer, p->screen_base, nb);
-		break;
-	case PBOOK_WAKE:
-		if (save_framebuffer) {
-			memcpy(p->screen_base, save_framebuffer, nb);
-			vfree(save_framebuffer);
-			save_framebuffer = NULL;
-		}
-		chipsfb_blank(0, p);
-		break;
-	}
-	return PBOOK_SLEEP_OK;
-}
-#endif /* CONFIG_PMAC_PBOOK */
-
 MODULE_LICENSE("GPL");
diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index 6d73eada277..373bd3b9b33 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -166,7 +166,7 @@ extern int pmu_i2c_simple_read(int bus, int addr,  u8* data, int len);
 extern int pmu_i2c_simple_write(int bus, int addr,  u8* data, int len);
 
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * Stuff for putting the powerbook to sleep and waking it again.
  *
@@ -208,6 +208,8 @@ struct pmu_sleep_notifier
 int pmu_register_sleep_notifier(struct pmu_sleep_notifier* notifier);
 int pmu_unregister_sleep_notifier(struct pmu_sleep_notifier* notifier);
 
+#endif /* CONFIG_PM */
+
 #define PMU_MAX_BATTERIES	2
 
 /* values for pmu_power_flags */
@@ -235,6 +237,4 @@ extern int pmu_battery_count;
 extern struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES];
 extern unsigned int pmu_power_flags;
 
-#endif /* CONFIG_PMAC_PBOOK */
-
 #endif	/* __KERNEL__ */
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 33108661e67..2704e1598ad 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -255,7 +255,7 @@ static int awacs_burgundy_read_mvolume(unsigned address);
 
 static volatile struct dbdma_cmd *emergency_dbdma_cmd;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * Stuff for restoring after a sleep.
  */
@@ -263,7 +263,7 @@ static int awacs_sleep_notify(struct pmu_sleep_notifier *self, int when);
 struct pmu_sleep_notifier awacs_sleep_notifier = {
 	awacs_sleep_notify, SLEEP_LEVEL_SOUND,
 };
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 /* for (soft) sample rate translations */
 int expand_bal;		/* Balance factor for expanding (not volume!) */
@@ -675,7 +675,7 @@ static void PMacIrqCleanup(void)
 	kfree(awacs_rx_cmd_space);
 	kfree(beep_dbdma_cmd_space);
 	kfree(beep_buf);
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	pmu_unregister_sleep_notifier(&awacs_sleep_notifier);
 #endif
 }
@@ -1415,7 +1415,7 @@ load_awacs(void)
 	}
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * Save state when going to sleep, restore it afterwards.
  */
@@ -1551,7 +1551,7 @@ static int awacs_sleep_notify(struct pmu_sleep_notifier *self, int when)
 	}
 	return PBOOK_SLEEP_OK;
 }
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 
 /* All the burgundy functions: */
@@ -3053,9 +3053,9 @@ printk("dmasound_pmac: Awacs/Screamer Codec Mfct: %d Rev %d\n", mfg, rev);
 	if ((res=setup_beep()))
 		return res ;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	pmu_register_sleep_notifier(&awacs_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 	/* Powerbooks have odd ways of enabling inputs such as
 	   an expansion-bay CD or sound from an internal modem
diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c
index e052bd071e5..061e52d3d77 100644
--- a/sound/ppc/awacs.c
+++ b/sound/ppc/awacs.c
@@ -90,7 +90,7 @@ snd_pmac_awacs_write_noreg(pmac_t *chip, int reg, int val)
 	snd_pmac_awacs_write(chip, val | (reg << 12));
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /* Recalibrate chip */
 static void screamer_recalibrate(pmac_t *chip)
 {
@@ -642,7 +642,7 @@ static void awacs_restore_all_regs(pmac_t *chip)
 	}
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 static void snd_pmac_awacs_suspend(pmac_t *chip)
 {
 	snd_pmac_awacs_write_noreg(chip, 1, (chip->awacs_reg[1]
@@ -676,7 +676,7 @@ static void snd_pmac_awacs_resume(pmac_t *chip)
 	}
 #endif
 }
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 #ifdef PMAC_SUPPORT_AUTOMUTE
 /*
@@ -883,7 +883,7 @@ snd_pmac_awacs_init(pmac_t *chip)
 	 * set lowlevel callbacks
 	 */
 	chip->set_format = snd_pmac_awacs_set_format;
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	chip->suspend = snd_pmac_awacs_suspend;
 	chip->resume = snd_pmac_awacs_resume;
 #endif
diff --git a/sound/ppc/daca.c b/sound/ppc/daca.c
index f24a9169361..a737f298e77 100644
--- a/sound/ppc/daca.c
+++ b/sound/ppc/daca.c
@@ -218,7 +218,7 @@ static snd_kcontrol_new_t daca_mixers[] = {
 };
 
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 static void daca_resume(pmac_t *chip)
 {
 	pmac_daca_t *mix = chip->mixer_data;
@@ -227,7 +227,7 @@ static void daca_resume(pmac_t *chip)
 				  mix->amp_on ? 0x05 : 0x04);
 	daca_set_volume(mix);
 }
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 
 static void daca_cleanup(pmac_t *chip)
@@ -275,7 +275,7 @@ int __init snd_pmac_daca_init(pmac_t *chip)
 			return err;
 	}
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	chip->resume = daca_resume;
 #endif
 
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 080ef392846..75b8b742303 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -36,7 +36,7 @@
 #include <asm/pci-bridge.h>
 
 
-#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK)
+#ifdef CONFIG_PM
 static int snd_pmac_register_sleep_notifier(pmac_t *chip);
 static int snd_pmac_unregister_sleep_notifier(pmac_t *chip);
 static int snd_pmac_suspend(snd_card_t *card, pm_message_t state);
@@ -782,7 +782,7 @@ static int snd_pmac_free(pmac_t *chip)
 	}
 
 	snd_pmac_sound_feature(chip, 0);
-#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK)
+#ifdef CONFIG_PM
 	snd_pmac_unregister_sleep_notifier(chip);
 #endif
 
@@ -1292,7 +1292,7 @@ int __init snd_pmac_new(snd_card_t *card, pmac_t **chip_return)
 	/* Reset dbdma channels */
 	snd_pmac_dbdma_reset(chip);
 
-#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK)
+#ifdef CONFIG_PM
 	/* add sleep notifier */
 	if (! snd_pmac_register_sleep_notifier(chip))
 		snd_card_set_pm_callback(chip->card, snd_pmac_suspend, snd_pmac_resume, chip);
@@ -1316,7 +1316,7 @@ int __init snd_pmac_new(snd_card_t *card, pmac_t **chip_return)
  * sleep notify for powerbook
  */
 
-#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK)
+#ifdef CONFIG_PM
 
 /*
  * Save state when going to sleep, restore it afterwards.
@@ -1414,4 +1414,5 @@ static int snd_pmac_unregister_sleep_notifier(pmac_t *chip)
 	return 0;
 }
 
-#endif /* CONFIG_PM && CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
+
diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h
index 0a84c05f714..582db522011 100644
--- a/sound/ppc/pmac.h
+++ b/sound/ppc/pmac.h
@@ -167,7 +167,7 @@ struct snd_pmac {
 	void (*set_format)(pmac_t *chip);
 	void (*update_automute)(pmac_t *chip, int do_notify);
 	int (*detect_headphone)(pmac_t *chip);
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	void (*suspend)(pmac_t *chip);
 	void (*resume)(pmac_t *chip);
 #endif
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 9332237cb6a..36c5d5d45bb 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -1128,7 +1128,7 @@ static void tumbler_reset_audio(pmac_t *chip)
 	}
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /* suspend mixer */
 static void tumbler_suspend(pmac_t *chip)
 {
@@ -1370,7 +1370,7 @@ int __init snd_pmac_tumbler_init(pmac_t *chip)
 	if ((err = snd_ctl_add(chip->card, chip->drc_sw_ctl)) < 0)
 		return err;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	chip->suspend = tumbler_suspend;
 	chip->resume = tumbler_resume;
 #endif
-- 
cgit v1.2.3-70-g09d2


From ffaa8bd6c904d1ab79b677905067349a5ff51d84 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@suse.de>
Date: Mon, 27 Jun 2005 14:36:36 -0700
Subject: [PATCH] seccomp: tsc disable

I believe at least for seccomp it's worth to turn off the tsc, not just for
HT but for the L2 cache too.  So it's up to you, either you turn it off
completely (which isn't very nice IMHO) or I recommend to apply this below
patch.

This has been tested successfully on x86-64 against current cogito
repository (i686 compiles so I didn't bother testing ;).  People selling
the cpu through cpushare may appreciate this bit for a peace of mind.

There's no way to get any timing info anymore with this applied
(gettimeofday is forbidden of course).  The seccomp environment is
completely deterministic so it can't be allowed to get timing info, it has
to be deterministic so in the future I can enable a computing mode that
does a parallel computing for each task with server side transparent
checkpointing and verification that the output is the same from all the 2/3
seller computers for each task, without the buyer even noticing (for now
the verification is left to the buyer client side and there's no
checkpointing, since that would require more kernel changes to track the
dirty bits but it'll be easy to extend once the basic mode is finished).

Eliminating a cold-cache read of the cr4 global variable will save one
cacheline during the tlb flush while making the code per-cpu-safe at the
same time.  Thanks to Mikael Pettersson for noticing the tlb flush wasn't
per-cpu-safe.

The global tlb flush can run from irq (IPI calling do_flush_tlb_all) but
it'll be transparent to the switch_to code since the IPI won't make any
change to the cr4 contents from the point of view of the interrupted code
and since it's now all per-cpu stuff, it will not race.  So no need to
disable irqs in switch_to slow path.

Signed-off-by: Andrea Arcangeli <andrea@cpushare.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/process.c    | 29 +++++++++++++++++++++++++++++
 arch/x86_64/kernel/process.c  | 29 +++++++++++++++++++++++++++++
 include/asm-i386/tlbflush.h   | 12 +++++++-----
 include/asm-x86_64/tlbflush.h | 12 +++++++-----
 include/linux/seccomp.h       | 10 ++++++++++
 5 files changed, 82 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 5f8cfa6b794..ba243a4cc11 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
 	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
  *	switch_to(x,yn) should switch tasks from x to y.
  *
@@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
 		handle_io_bitmap(next, tss);
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 }
 
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 1d91271796e..7577f9d7a75 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -481,6 +481,33 @@ out:
 	return err;
 }
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
  * This special macro can be used to load a debugging register
  */
@@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
 		}
 	}
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 }
 
diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h
index f22fab0cea2..ab216e1370e 100644
--- a/include/asm-i386/tlbflush.h
+++ b/include/asm-i386/tlbflush.h
@@ -22,16 +22,18 @@
  */
 #define __flush_tlb_global()						\
 	do {								\
-		unsigned int tmpreg;					\
+		unsigned int tmpreg, cr4, cr4_orig;			\
 									\
 		__asm__ __volatile__(					\
-			"movl %1, %%cr4;  # turn off PGE     \n"	\
+			"movl %%cr4, %2;  # turn off PGE     \n"	\
+			"movl %2, %1;                        \n"	\
+			"andl %3, %1;                        \n"	\
+			"movl %1, %%cr4;                     \n"	\
 			"movl %%cr3, %0;                     \n"	\
 			"movl %0, %%cr3;  # flush TLB        \n"	\
 			"movl %2, %%cr4;  # turn PGE back on \n"	\
-			: "=&r" (tmpreg)				\
-			: "r" (mmu_cr4_features & ~X86_CR4_PGE),	\
-			  "r" (mmu_cr4_features)			\
+			: "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig)	\
+			: "i" (~X86_CR4_PGE)				\
 			: "memory");					\
 	} while (0)
 
diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h
index 2e811ac262a..06174238252 100644
--- a/include/asm-x86_64/tlbflush.h
+++ b/include/asm-x86_64/tlbflush.h
@@ -22,16 +22,18 @@
  */
 #define __flush_tlb_global()						\
 	do {								\
-		unsigned long tmpreg;					\
+		unsigned long tmpreg, cr4, cr4_orig;			\
 									\
 		__asm__ __volatile__(					\
-			"movq %1, %%cr4;  # turn off PGE     \n"	\
+			"movq %%cr4, %2;  # turn off PGE     \n"	\
+			"movq %2, %1;                        \n"	\
+			"andq %3, %1;                        \n"	\
+			"movq %1, %%cr4;                     \n"	\
 			"movq %%cr3, %0;  # flush TLB        \n"	\
 			"movq %0, %%cr3;                     \n"	\
 			"movq %2, %%cr4;  # turn PGE back on \n"	\
-			: "=&r" (tmpreg)				\
-			: "r" (mmu_cr4_features & ~X86_CR4_PGE),	\
-			  "r" (mmu_cr4_features)			\
+			: "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig)	\
+			: "i" (~X86_CR4_PGE)				\
 			: "memory");					\
 	} while (0)
 
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 3a2702bbb1d..dc89116bb1c 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -19,6 +19,11 @@ static inline void secure_computing(int this_syscall)
 		__secure_computing(this_syscall);
 }
 
+static inline int has_secure_computing(struct thread_info *ti)
+{
+	return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP));
+}
+
 #else /* CONFIG_SECCOMP */
 
 #if (__GNUC__ > 2)
@@ -28,6 +33,11 @@ static inline void secure_computing(int this_syscall)
 #endif
 
 #define secure_computing(x) do { } while (0)
+/* static inline to preserve typechecking */
+static inline int has_secure_computing(struct thread_info *ti)
+{
+	return 0;
+}
 
 #endif /* CONFIG_SECCOMP */
 
-- 
cgit v1.2.3-70-g09d2


From 3de0a70bd926ff974adb27a38d4fd1049f05e54e Mon Sep 17 00:00:00 2001
From: Mike Miller <mike.miller@hp.com>
Date: Mon, 27 Jun 2005 14:36:48 -0700
Subject: [PATCH] cciss: pci id fix

This patch fixes a PCI ID I got wrong before.  It also adds support for
another new SAS controller due out this summer.  I didn't have a marketing
name prior to my last submission.  Also modifies the copyright date range.

Signed-off-by: Mike Miller <mike.miller@hp.com>
Acked-by: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cciss.txt | 1 +
 drivers/block/cciss.c   | 9 ++++++---
 include/linux/pci_ids.h | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt
index d599beb9df8..c8f9a73111d 100644
--- a/Documentation/cciss.txt
+++ b/Documentation/cciss.txt
@@ -17,6 +17,7 @@ This driver is known to work with the following cards:
 	* SA P600
 	* SA P800
 	* SA E400
+	* SA E300
 
 If nodes are not already created in the /dev/cciss directory, run as root:
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index abde27027c0..0cd606ce222 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1,6 +1,6 @@
 /*
  *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
- *    Copyright 2000, 2002 Hewlett-Packard Development Company, L.P.
+ *    Copyright 2000, 2005 Hewlett-Packard Development Company, L.P.
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -54,7 +54,7 @@
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 E400");
+			" SA6i P600 P800 E400 E300");
 MODULE_LICENSE("GPL");
 
 #include "cciss_cmd.h"
@@ -85,8 +85,10 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 		0x103C, 0x3225, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
 		0x103c, 0x3223, 0, 0, 0},
-	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
 		0x103c, 0x3231, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
+		0x103c, 0x3233, 0, 0, 0},
 	{0,}
 };
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
@@ -110,6 +112,7 @@ static struct board_type products[] = {
 	{ 0x3225103C, "Smart Array P600", &SA5_access},
 	{ 0x3223103C, "Smart Array P800", &SA5_access},
 	{ 0x3231103C, "Smart Array E400", &SA5_access},
+	{ 0x3233103C, "Smart Array E300", &SA5_access},
 };
 
 /* How long to wait (in millesconds) for board to go into simple mode */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3af7450278b..a3961e1d518 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -712,8 +712,9 @@
 #define PCI_DEVICE_ID_HP_DIVA_AUX	0x1290
 #define PCI_DEVICE_ID_HP_DIVA_RMP3	0x1301
 #define PCI_DEVICE_ID_HP_CISSA		0x3220
-#define PCI_DEVICE_ID_HP_CISSB		0x3230
+#define PCI_DEVICE_ID_HP_CISSB		0x3222
 #define PCI_DEVICE_ID_HP_ZX2_IOC	0x4031
+#define PCI_DEVICE_ID_HP_CISSC		0x3230
 
 #define PCI_VENDOR_ID_PCTECH		0x1042
 #define PCI_DEVICE_ID_PCTECH_RZ1000	0x1000
-- 
cgit v1.2.3-70-g09d2


From cd6fb584cf7f18ec6b221192b57d712ecc8c1859 Mon Sep 17 00:00:00 2001
From: Mike Miller <mike.miller@hp.com>
Date: Mon, 27 Jun 2005 14:36:49 -0700
Subject: [PATCH] cciss: pci domain info pass 2

This is pass 2 of my patch to add pci domain info to an existing ioctl.  This
time I insert the domain between dev_fn and board_id as Willy suggested and
change the var to unsigned short to ease Christoph's concerns.  Although I
thought unsigned int was the correct var type for this.  I also thought it
didn't matter where I inserted it in the structure.

Signed-off-by: Mike Miller <mike.miller@hp.com>
Acked-by: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/cciss.c       | 1 +
 include/linux/cciss_ioctl.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 0cd606ce222..d5d0fa538f1 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -638,6 +638,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 		cciss_pci_info_struct pciinfo;
 
 		if (!arg) return -EINVAL;
+		pciinfo.domain = pci_domain_nr(host->pdev->bus);
 		pciinfo.bus = host->pdev->bus->number;
 		pciinfo.dev_fn = host->pdev->devfn;
 		pciinfo.board_id = host->board_id;
diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h
index ee0c6e8995d..424d5e622b4 100644
--- a/include/linux/cciss_ioctl.h
+++ b/include/linux/cciss_ioctl.h
@@ -10,6 +10,7 @@
 typedef struct _cciss_pci_info_struct
 {
 	unsigned char 	bus;
+	unsigned short	domain;
 	unsigned char 	dev_fn;
 	__u32 		board_id;
 } cciss_pci_info_struct; 
-- 
cgit v1.2.3-70-g09d2


From 9ec4b1f356b3bad928ae8e2aa9caebfa737d52df Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Mon, 27 Jun 2005 15:17:01 -0700
Subject: [PATCH] kprobes: fix single-step out of line - take2

Now that PPC64 has no-execute support, here is a second try to fix the
single step out of line during kprobe execution.  Kprobes on x86_64 already
solved this problem by allocating an executable page and using it as the
scratch area for stepping out of line.  Reuse that.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc64/kernel/kprobes.c  |  26 ++++++++--
 arch/x86_64/kernel/kprobes.c | 113 +------------------------------------------
 include/asm-ia64/kprobes.h   |   1 +
 include/asm-ppc64/kprobes.h  |   2 +-
 include/linux/kprobes.h      |   2 +
 kernel/kprobes.c             | 101 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 128 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index 782ce3efa2c..86cc5496db9 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -36,6 +36,8 @@
 #include <asm/kdebug.h>
 #include <asm/sstep.h>
 
+static DECLARE_MUTEX(kprobe_mutex);
+
 static struct kprobe *current_kprobe;
 static unsigned long kprobe_status, kprobe_saved_msr;
 static struct kprobe *kprobe_prev;
@@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p)
 		printk("Cannot register a kprobe on rfid or mtmsrd\n");
 		ret = -EINVAL;
 	}
+
+	/* insn must be on a special executable page on ppc64 */
+	if (!ret) {
+		up(&kprobe_mutex);
+		p->ainsn.insn = get_insn_slot();
+		down(&kprobe_mutex);
+		if (!p->ainsn.insn)
+			ret = -ENOMEM;
+	}
 	return ret;
 }
 
@@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p)
 
 void arch_remove_kprobe(struct kprobe *p)
 {
+	up(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	down(&kprobe_mutex);
 }
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
+	kprobe_opcode_t insn = *p->ainsn.insn;
+
 	regs->msr |= MSR_SE;
-	/*single step inline if it a breakpoint instruction*/
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
+
+	/* single step inline if it is a trap variant */
+	if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn))
 		regs->nip = (unsigned long)p->addr;
 	else
-		regs->nip = (unsigned long)&p->ainsn.insn;
+		regs->nip = (unsigned long)p->ainsn.insn;
 }
 
 static inline void save_previous_kprobe(void)
@@ -205,9 +222,10 @@ no_kprobe:
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	int ret;
+	unsigned int insn = *p->ainsn.insn;
 
 	regs->nip = (unsigned long)p->addr;
-	ret = emulate_step(regs, p->ainsn.insn[0]);
+	ret = emulate_step(regs, insn);
 	if (ret == 0)
 		regs->nip = (unsigned long)p->addr + 4;
 }
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index 4e680f87a75..6a1c88376be 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -38,7 +38,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
-#include <linux/moduleloader.h>
+
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
@@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev;
 static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
 static struct pt_regs jprobe_saved_regs;
 static long *jprobe_saved_rsp;
-static kprobe_opcode_t *get_insn_slot(void);
-static void free_insn_slot(kprobe_opcode_t *slot);
 void jprobe_return_end(void);
 
 /* copy of the kernel stack at the probe fire time */
@@ -681,112 +679,3 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 	return 0;
 }
-
-/*
- * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped.
- * By default on x86_64, pages we get from kmalloc or vmalloc are not
- * executable.  Single-stepping an instruction on such a page yields an
- * oops.  So instead of storing the instruction copies in their respective
- * kprobe objects, we allocate a page, map it executable, and store all the
- * instruction copies there.  (We can allocate additional pages if somebody
- * inserts a huge number of probes.)  Each page can hold up to INSNS_PER_PAGE
- * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t)
- * bytes.
- */
-#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t)))
-struct kprobe_insn_page {
-	struct hlist_node hlist;
-	kprobe_opcode_t *insns;		/* page of instruction slots */
-	char slot_used[INSNS_PER_PAGE];
-	int nused;
-};
-
-static struct hlist_head kprobe_insn_pages;
-
-/**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
- * We allocate an executable page if there's no room on existing ones.
- */
-static kprobe_opcode_t *get_insn_slot(void)
-{
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->nused < INSNS_PER_PAGE) {
-			int i;
-			for (i = 0; i < INSNS_PER_PAGE; i++) {
-				if (!kip->slot_used[i]) {
-					kip->slot_used[i] = 1;
-					kip->nused++;
-					return kip->insns + (i*MAX_INSN_SIZE);
-				}
-			}
-			/* Surprise!  No unused slots.  Fix kip->nused. */
-			kip->nused = INSNS_PER_PAGE;
-		}
-	}
-
-	/* All out of space.  Need to allocate a new page. Use slot 0.*/
-	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
-	if (!kip) {
-		return NULL;
-	}
-
-	/*
-	 * For the %rip-relative displacement fixups to be doable, we
-	 * need our instruction copy to be within +/- 2GB of any data it
-	 * might access via %rip.  That is, within 2GB of where the
-	 * kernel image and loaded module images reside.  So we allocate
-	 * a page in the module loading area.
-	 */
-	kip->insns = module_alloc(PAGE_SIZE);
-	if (!kip->insns) {
-		kfree(kip);
-		return NULL;
-	}
-	INIT_HLIST_NODE(&kip->hlist);
-	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
-	memset(kip->slot_used, 0, INSNS_PER_PAGE);
-	kip->slot_used[0] = 1;
-	kip->nused = 1;
-	return kip->insns;
-}
-
-/**
- * free_insn_slot() - Free instruction slot obtained from get_insn_slot().
- */
-static void free_insn_slot(kprobe_opcode_t *slot)
-{
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->insns <= slot
-		    && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) {
-			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			kip->slot_used[i] = 0;
-			kip->nused--;
-			if (kip->nused == 0) {
-				/*
-				 * Page is no longer in use.  Free it unless
-				 * it's the last one.  We keep the last one
-				 * so as not to have to set it up again the
-				 * next time somebody inserts a probe.
-				 */
-				hlist_del(&kip->hlist);
-				if (hlist_empty(&kprobe_insn_pages)) {
-					INIT_HLIST_NODE(&kip->hlist);
-					hlist_add_head(&kip->hlist,
-						&kprobe_insn_pages);
-				} else {
-					module_free(NULL, kip->insns);
-					kfree(kip);
-				}
-			}
-			return;
-		}
-	}
-}
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index 7b700035e36..25d8b1edfcb 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -28,6 +28,7 @@
 #include <linux/ptrace.h>
 #include <asm/break.h>
 
+#define MAX_INSN_SIZE   16
 #define BREAK_INST	(long)(__IA64_BREAK_KPROBE << 6)
 
 typedef union cmp_inst {
diff --git a/include/asm-ppc64/kprobes.h b/include/asm-ppc64/kprobes.h
index 19b468bed05..790cf7c5277 100644
--- a/include/asm-ppc64/kprobes.h
+++ b/include/asm-ppc64/kprobes.h
@@ -45,7 +45,7 @@ typedef unsigned int kprobe_opcode_t;
 /* Architecture specific copy of original instruction */
 struct arch_specific_insn {
 	/* copy of original instruction */
-	kprobe_opcode_t insn[MAX_INSN_SIZE];
+	kprobe_opcode_t *insn;
 };
 
 #ifdef CONFIG_KPROBES
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 5e1a7b0d7b3..d304d457985 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -177,6 +177,8 @@ extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
 extern void arch_remove_kprobe(struct kprobe *p);
 extern void show_registers(struct pt_regs *regs);
+extern kprobe_opcode_t *get_insn_slot(void);
+extern void free_insn_slot(kprobe_opcode_t *slot);
 
 /* Get the kprobe at this addr (if any).  Must have called lock_kprobes */
 struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 334f37472c5..65242529a75 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -36,6 +36,7 @@
 #include <linux/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/moduleloader.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
 #include <asm/kdebug.h>
@@ -50,6 +51,106 @@ unsigned int kprobe_cpu = NR_CPUS;
 static DEFINE_SPINLOCK(kprobe_lock);
 static struct kprobe *curr_kprobe;
 
+/*
+ * kprobe->ainsn.insn points to the copy of the instruction to be
+ * single-stepped. x86_64, POWER4 and above have no-exec support and
+ * stepping on the instruction on a vmalloced/kmalloced/data page
+ * is a recipe for disaster
+ */
+#define INSNS_PER_PAGE	(PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe_insn_page {
+	struct hlist_node hlist;
+	kprobe_opcode_t *insns;		/* Page of instruction slots */
+	char slot_used[INSNS_PER_PAGE];
+	int nused;
+};
+
+static struct hlist_head kprobe_insn_pages;
+
+/**
+ * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * We allocate an executable page if there's no room on existing ones.
+ */
+kprobe_opcode_t *get_insn_slot(void)
+{
+	struct kprobe_insn_page *kip;
+	struct hlist_node *pos;
+
+	hlist_for_each(pos, &kprobe_insn_pages) {
+		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+		if (kip->nused < INSNS_PER_PAGE) {
+			int i;
+			for (i = 0; i < INSNS_PER_PAGE; i++) {
+				if (!kip->slot_used[i]) {
+					kip->slot_used[i] = 1;
+					kip->nused++;
+					return kip->insns + (i * MAX_INSN_SIZE);
+				}
+			}
+			/* Surprise!  No unused slots.  Fix kip->nused. */
+			kip->nused = INSNS_PER_PAGE;
+		}
+	}
+
+	/* All out of space.  Need to allocate a new page. Use slot 0.*/
+	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
+	if (!kip) {
+		return NULL;
+	}
+
+	/*
+	 * Use module_alloc so this page is within +/- 2GB of where the
+	 * kernel image and loaded module images reside. This is required
+	 * so x86_64 can correctly handle the %rip-relative fixups.
+	 */
+	kip->insns = module_alloc(PAGE_SIZE);
+	if (!kip->insns) {
+		kfree(kip);
+		return NULL;
+	}
+	INIT_HLIST_NODE(&kip->hlist);
+	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
+	memset(kip->slot_used, 0, INSNS_PER_PAGE);
+	kip->slot_used[0] = 1;
+	kip->nused = 1;
+	return kip->insns;
+}
+
+void free_insn_slot(kprobe_opcode_t *slot)
+{
+	struct kprobe_insn_page *kip;
+	struct hlist_node *pos;
+
+	hlist_for_each(pos, &kprobe_insn_pages) {
+		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+		if (kip->insns <= slot &&
+		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
+			int i = (slot - kip->insns) / MAX_INSN_SIZE;
+			kip->slot_used[i] = 0;
+			kip->nused--;
+			if (kip->nused == 0) {
+				/*
+				 * Page is no longer in use.  Free it unless
+				 * it's the last one.  We keep the last one
+				 * so as not to have to set it up again the
+				 * next time somebody inserts a probe.
+				 */
+				hlist_del(&kip->hlist);
+				if (hlist_empty(&kprobe_insn_pages)) {
+					INIT_HLIST_NODE(&kip->hlist);
+					hlist_add_head(&kip->hlist,
+						&kprobe_insn_pages);
+				} else {
+					module_free(NULL, kip->insns);
+					kfree(kip);
+				}
+			}
+			return;
+		}
+	}
+}
+
 /* Locks kprobe: irqs must be disabled */
 void lock_kprobes(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 802eae7c800fb7f583e6c06afa363585af2bef00 Mon Sep 17 00:00:00 2001
From: Rusty Lynch <rusty.lynch@intel.com>
Date: Mon, 27 Jun 2005 15:17:08 -0700
Subject: [PATCH] Return probe redesign: architecture independent changes

The following is the second version of the function return probe patches
I sent out earlier this week.  Changes since my last submission include:

* Fix in ppc64 code removing an unneeded call to re-enable preemption
* Fix a build problem in ia64 when kprobes was turned off
* Added another BUG_ON check to each of the architecture trampoline
  handlers

My initial patch description ==>

 From my experiences with adding return probes to x86_64 and ia64, and the
feedback on LKML to those patches, I think we can simplify the design
for return probes.

The following patch tweaks the original design such that:

* Instead of storing the stack address in the return probe instance, the
  task pointer is stored.  This gives us all we need in order to:
    - find the correct return probe instance when we enter the trampoline
      (even if we are recursing)
    - find all left-over return probe instances when the task is going away

  This has the side effect of simplifying the implementation since more
  work can be done in kernel/kprobes.c since architecture specific knowledge
  of the stack layout is no longer required.  Specifically, we no longer have:
	- arch_get_kprobe_task()
	- arch_kprobe_flush_task()
	- get_rp_inst_tsk()
	- get_rp_inst()
	- trampoline_post_handler() <see next bullet>

* Instead of splitting the return probe handling and cleanup logic across
  the pre and post trampoline handlers, all the work is pushed into the
  pre function (trampoline_probe_handler), and then we skip single stepping
  the original function.  In this case the original instruction to be single
  stepped was just a NOP, and we can do without the extra interruption.

The new flow of events to having a return probe handler execute when a target
function exits is:

* At system initialization time, a kprobe is inserted at the beginning of
  kretprobe_trampoline.  kernel/kprobes.c use to handle this on it's own,
  but ia64 needed to do this a little differently (i.e. a function pointer
  is really a pointer to a structure containing the instruction pointer and
  a global pointer), so I added the notion of arch_init(), so that
  kernel/kprobes.c:init_kprobes() now allows architecture specific
  initialization by calling arch_init() before exiting.  Each architecture
  now registers a kprobe on it's own trampoline function.

* register_kretprobe() will insert a kprobe at the beginning of the targeted
  function with the kprobe pre_handler set to arch_prepare_kretprobe
  (still no change)

* When the target function is entered, the kprobe is fired, calling
  arch_prepare_kretprobe (still no change)

* In arch_prepare_kretprobe() we try to get a free instance and if one is
  available then we fill out the instance with a pointer to the return probe,
  the original return address, and a pointer to the task structure (instead
  of the stack address.)  Just like before we change the return address
  to the trampoline function and mark the instance as used.

  If multiple return probes are registered for a given target function,
  then arch_prepare_kretprobe() will get called multiple times for the same
  task (since our kprobe implementation is able to handle multiple kprobes
  at the same address.)  Past the first call to arch_prepare_kretprobe,
  we end up with the original address stored in the return probe instance
  pointing to our trampoline function. (This is a significant difference
  from the original arch_prepare_kretprobe design.)

* Target function executes like normal and then returns to kretprobe_trampoline.

* kprobe inserted on the first instruction of kretprobe_trampoline is fired
  and calls trampoline_probe_handler() (no change here)

* trampoline_probe_handler() consumes each of the instances associated with
  the current task by calling the registered handler function and marking
  the instance as unused until an instance is found that has a return address
  different then the trampoline function.

  (change similar to my previous ia64 RFC)

* If the task is killed with some left-over return probe instances (meaning
  that a target function was entered, but never returned), then we just
  free any instances associated with the task.  (Not much different other
  then we can handle this without calling architecture specific functions.)

  There is a known problem that this patch does not yet solve where
  registering a return probe flush_old_exec or flush_thread will put us
  in a bad state.  Most likely the best way to handle this is to not allow
  registering return probes on these two functions.

  (Significant change)

This patch series applies to the 2.6.12-rc6-mm1 kernel, and provides:
  * kernel/kprobes.c changes
  * i386 patch of existing return probes implementation
  * x86_64 patch of existing return probe implementation
  * ia64 implementation
  * ppc64 implementation (provided by Ananth)

This patch implements the architecture independant changes for a reworking
of the kprobes based function return probes design. Changes include:

  * Removing functions for querying a return probe instance off a stack address
  * Removing the stack_addr field from the kretprobe_instance definition,
    and adding a task pointer
  * Adding architecture specific initialization via arch_init()
  * Removing extern definitions for the architecture trampoline functions
    (this isn't needed anymore since the architecture handles the
     initialization of the kprobe in the return probe trampoline function.)

Signed-off-by: Rusty Lynch <rusty.lynch@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 28 +++-----------------
 kernel/kprobes.c        | 69 ++++++++++++++-----------------------------------
 2 files changed, 22 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index d304d457985..b7a194c4362 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -104,33 +104,12 @@ struct jprobe {
 };
 
 #ifdef ARCH_SUPPORTS_KRETPROBES
-extern int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs);
-extern void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
-							unsigned long flags);
-extern struct task_struct *arch_get_kprobe_task(void *ptr);
 extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs);
-extern void arch_kprobe_flush_task(struct task_struct *tk);
 #else /* ARCH_SUPPORTS_KRETPROBES */
-static inline void kretprobe_trampoline(void)
-{
-}
-static inline int trampoline_probe_handler(struct kprobe *p,
-						struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void trampoline_post_handler(struct kprobe *p,
-				struct pt_regs *regs, unsigned long flags)
-{
-}
 static inline void arch_prepare_kretprobe(struct kretprobe *rp,
 					struct pt_regs *regs)
 {
 }
-static inline void arch_kprobe_flush_task(struct task_struct *tk)
-{
-}
-#define arch_get_kprobe_task(ptr) ((struct task_struct *)NULL)
 #endif /* ARCH_SUPPORTS_KRETPROBES */
 /*
  * Function-return probe -
@@ -155,8 +134,8 @@ struct kretprobe_instance {
 	struct hlist_node uflist; /* either on free list or used list */
 	struct hlist_node hlist;
 	struct kretprobe *rp;
-	void *ret_addr;
-	void *stack_addr;
+	kprobe_opcode_t *ret_addr;
+	struct task_struct *task;
 };
 
 #ifdef CONFIG_KPROBES
@@ -176,6 +155,7 @@ extern void arch_copy_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
 extern void arch_remove_kprobe(struct kprobe *p);
+extern int arch_init(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot);
@@ -196,8 +176,6 @@ int register_kretprobe(struct kretprobe *rp);
 void unregister_kretprobe(struct kretprobe *rp);
 
 struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
-struct kretprobe_instance *get_rp_inst(void *sara);
-struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk);
 void add_rp_inst(struct kretprobe_instance *ri);
 void kprobe_flush_task(struct task_struct *tk);
 void recycle_rp_inst(struct kretprobe_instance *ri);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 65242529a75..90c0e82b650 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -240,12 +240,6 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-struct kprobe trampoline_p = {
-		.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
-		.pre_handler = trampoline_probe_handler,
-		.post_handler = trampoline_post_handler
-};
-
 struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
 {
 	struct hlist_node *node;
@@ -264,35 +258,18 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
 	return NULL;
 }
 
-struct kretprobe_instance *get_rp_inst(void *sara)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct task_struct *tsk;
-	struct kretprobe_instance *ri;
-
-	tsk = arch_get_kprobe_task(sara);
-	head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
-	hlist_for_each_entry(ri, node, head, hlist) {
-		if (ri->stack_addr == sara)
-			return ri;
-	}
-	return NULL;
-}
-
 void add_rp_inst(struct kretprobe_instance *ri)
 {
-	struct task_struct *tsk;
 	/*
 	 * Remove rp inst off the free list -
 	 * Add it back when probed function returns
 	 */
 	hlist_del(&ri->uflist);
-	tsk = arch_get_kprobe_task(ri->stack_addr);
+
 	/* Add rp inst onto table */
 	INIT_HLIST_NODE(&ri->hlist);
 	hlist_add_head(&ri->hlist,
-			&kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]);
+			&kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]);
 
 	/* Also add this rp inst to the used list. */
 	INIT_HLIST_NODE(&ri->uflist);
@@ -319,34 +296,25 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
 	return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
 }
 
-struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk)
-{
-	struct task_struct *tsk;
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct kretprobe_instance *ri;
-
-	head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)];
-
-	hlist_for_each_entry(ri, node, head, hlist) {
-		tsk = arch_get_kprobe_task(ri->stack_addr);
-		if (tsk == tk)
-			return ri;
-	}
-	return NULL;
-}
-
 /*
- * This function is called from do_exit or do_execv when task tk's stack is
- * about to be recycled. Recycle any function-return probe instances
- * associated with this task. These represent probed functions that have
- * been called but may never return.
+ * This function is called from exit_thread or flush_thread when task tk's
+ * stack is being recycled so that we can recycle any function-return probe
+ * instances associated with this task. These left over instances represent
+ * probed functions that have been called but will never return.
  */
 void kprobe_flush_task(struct task_struct *tk)
 {
+        struct kretprobe_instance *ri;
+        struct hlist_head *head;
+	struct hlist_node *node, *tmp;
 	unsigned long flags = 0;
+
 	spin_lock_irqsave(&kprobe_lock, flags);
-	arch_kprobe_flush_task(tk);
+        head = kretprobe_inst_table_head(current);
+        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task == tk)
+                        recycle_rp_inst(ri);
+        }
 	spin_unlock_irqrestore(&kprobe_lock, flags);
 }
 
@@ -606,9 +574,10 @@ static int __init init_kprobes(void)
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
 	}
 
-	err = register_die_notifier(&kprobe_exceptions_nb);
-	/* Register the trampoline probe for return probe */
-	register_kprobe(&trampoline_p);
+	err = arch_init();
+	if (!err)
+		err = register_die_notifier(&kprobe_exceptions_nb);
+
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From da9091ee3b5f9808c64abb925cefe7b100018614 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 27 Jun 2005 15:24:30 -0700
Subject: [PATCH] ide: it8212 backport for Bartlomiej IDE

This lets you throw out the iteraid stuff that has ended up back in due
to stupid goings on in the IDE world. Its the same heavily tested code
shipped in Fedora/Red Hat products but without the other dependancies on
the Bartlomiej IDE layer.

Pre-requisite: the ide-disk patch I sent to handle pure LBA devices.

Obviously you lose things like hot unplug with the Bartlomiej IDE layer
at the moment but that won't matter to most users.

The patch does the following
- Add IT8211/12 to pci_ids.h
- Add Makefile/Kconfig entry
- Add it8212 driver

No core IDE code is touched by this diff

Embedded system testing and the ability to force raid mode off by David
Howells

Made possible by the ite reference code, documentation and also several
clarifications and pieces of assistance provided by ITE themselves

Signed-off-by: Alan Cox <alan@redhat.com>
Acked-by: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/Kconfig      |   6 +
 drivers/ide/pci/Makefile |   1 +
 drivers/ide/pci/it821x.c | 812 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h  |   2 +
 4 files changed, 821 insertions(+)
 create mode 100644 drivers/ide/pci/it821x.c

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 0273f124a4f..5f33df47aa7 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -606,6 +606,12 @@ config BLK_DEV_IT8172
 	  <http://www.ite.com.tw/ia/brief_it8172bsp.htm>; picture of the
 	  board at <http://www.mvista.com/partners/semiconductor/ite.html>.
 
+config BLK_DEV_IT821X
+	tristate "IT821X IDE support"
+	help
+	  This driver adds support for the ITE 8211 IDE controller and the
+	  IT 8212 IDE RAID controller in both RAID and pass-through mode.
+
 config BLK_DEV_NS87415
 	tristate "NS87415 chipset support"
 	help
diff --git a/drivers/ide/pci/Makefile b/drivers/ide/pci/Makefile
index 55e6e553e49..af46226c179 100644
--- a/drivers/ide/pci/Makefile
+++ b/drivers/ide/pci/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 #obj-$(CONFIG_BLK_DEV_HPT37X)		+= hpt37x.o
 obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
+obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
 obj-$(CONFIG_BLK_DEV_NS87415)		+= ns87415.o
 obj-$(CONFIG_BLK_DEV_OPTI621)		+= opti621.o
 obj-$(CONFIG_BLK_DEV_PDC202XX_OLD)	+= pdc202xx_old.o
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
new file mode 100644
index 00000000000..e440036e651
--- /dev/null
+++ b/drivers/ide/pci/it821x.c
@@ -0,0 +1,812 @@
+
+/*
+ * linux/drivers/ide/pci/it821x.c		Version 0.09	December 2004
+ *
+ * Copyright (C) 2004		Red Hat <alan@redhat.com>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *  Based in part on the ITE vendor provided SCSI driver.
+ *
+ *  Documentation available from
+ * 	http://www.ite.com.tw/pc/IT8212F_V04.pdf
+ *  Some other documents are NDA.
+ *
+ *  The ITE8212 isn't exactly a standard IDE controller. It has two
+ *  modes. In pass through mode then it is an IDE controller. In its smart
+ *  mode its actually quite a capable hardware raid controller disguised
+ *  as an IDE controller. Smart mode only understands DMA read/write and
+ *  identify, none of the fancier commands apply. The IT8211 is identical
+ *  in other respects but lacks the raid mode.
+ *
+ *  Errata:
+ *  o	Rev 0x10 also requires master/slave hold the same DMA timings and
+ *	cannot do ATAPI MWDMA.
+ *  o	The identify data for raid volumes lacks CHS info (technically ok)
+ *	but also fails to set the LBA28 and other bits. We fix these in
+ *	the IDE probe quirk code.
+ *  o	If you write LBA48 sized I/O's (ie > 256 sector) in smart mode
+ *	raid then the controller firmware dies
+ *  o	Smart mode without RAID doesn't clear all the necessary identify
+ *	bits to reduce the command set to the one used
+ *
+ *  This has a few impacts on the driver
+ *  - In pass through mode we do all the work you would expect
+ *  - In smart mode the clocking set up is done by the controller generally
+ *    but we must watch the other limits and filter.
+ *  - There are a few extra vendor commands that actually talk to the
+ *    controller but only work PIO with no IRQ.
+ *
+ *  Vendor areas of the identify block in smart mode are used for the
+ *  timing and policy set up. Each HDD in raid mode also has a serial
+ *  block on the disk. The hardware extra commands are get/set chip status,
+ *  rebuild, get rebuild status.
+ *
+ *  In Linux the driver supports pass through mode as if the device was
+ *  just another IDE controller. If the smart mode is running then
+ *  volumes are managed by the controller firmware and each IDE "disk"
+ *  is a raid volume. Even more cute - the controller can do automated
+ *  hotplug and rebuild.
+ *
+ *  The pass through controller itself is a little demented. It has a
+ *  flaw that it has a single set of PIO/MWDMA timings per channel so
+ *  non UDMA devices restrict each others performance. It also has a
+ *  single clock source per channel so mixed UDMA100/133 performance
+ *  isn't perfect and we have to pick a clock. Thankfully none of this
+ *  matters in smart mode. ATAPI DMA is not currently supported.
+ *
+ *  It seems the smart mode is a win for RAID1/RAID10 but otherwise not.
+ *
+ *  TODO
+ *	-	ATAPI UDMA is ok but not MWDMA it seems
+ *	-	RAID configuration ioctls
+ *	-	Move to libata once it grows up
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+struct it821x_dev
+{
+	unsigned int smart:1,		/* Are we in smart raid mode */
+		timing10:1;		/* Rev 0x10 */
+	u8	clock_mode;		/* 0, ATA_50 or ATA_66 */
+	u8	want[2][2];		/* Mode/Pri log for master slave */
+	/* We need these for switching the clock when DMA goes on/off
+	   The high byte is the 66Mhz timing */
+	u16	pio[2];			/* Cached PIO values */
+	u16	mwdma[2];		/* Cached MWDMA values */
+	u16	udma[2];		/* Cached UDMA values (per drive) */
+};
+
+#define ATA_66		0
+#define ATA_50		1
+#define ATA_ANY		2
+
+#define UDMA_OFF	0
+#define MWDMA_OFF	0
+
+/*
+ *	We allow users to force the card into non raid mode without
+ *	flashing the alternative BIOS. This is also neccessary right now
+ *	for embedded platforms that cannot run a PC BIOS but are using this
+ *	device.
+ */
+
+static int it8212_noraid;
+
+/**
+ *	it821x_program	-	program the PIO/MWDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the PIO/MWDMA timing for this channel according to the
+ *	current clock.
+ */
+
+static void it821x_program(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	u8 conf;
+
+	/* Program PIO/MWDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	pci_write_config_byte(hwif->pci_dev, 0x54 + 4 * channel, conf);
+}
+
+/**
+ *	it821x_program_udma	-	program the UDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the UDMA timing for this drive according to the
+ *	current clock.
+ */
+
+static void it821x_program_udma(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	int unit = drive->select.b.unit;
+	u8 conf;
+
+	/* Program UDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	if(itdev->timing10 == 0)
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + unit, conf);
+	else {
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel, conf);
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + 1, conf);
+	}
+}
+
+
+/**
+ *	it821x_clock_strategy
+ *	@hwif: hardware interface
+ *
+ *	Select between the 50 and 66Mhz base clocks to get the best
+ *	results for this interface.
+ */
+
+static void it821x_clock_strategy(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+
+	u8 unit = drive->select.b.unit;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+
+	int clock, altclock;
+	u8 v;
+	int sel = 0;
+
+	if(itdev->want[0][0] > itdev->want[1][0]) {
+		clock = itdev->want[0][1];
+		altclock = itdev->want[1][1];
+	} else {
+		clock = itdev->want[1][1];
+		altclock = itdev->want[0][1];
+	}
+
+	/* Master doesn't care does the slave ? */
+	if(clock == ATA_ANY)
+		clock = altclock;
+
+	/* Nobody cares - keep the same clock */
+	if(clock == ATA_ANY)
+		return;
+	/* No change */
+	if(clock == itdev->clock_mode)
+		return;
+
+	/* Load this into the controller ? */
+	if(clock == ATA_66)
+		itdev->clock_mode = ATA_66;
+	else {
+		itdev->clock_mode = ATA_50;
+		sel = 1;
+	}
+	pci_read_config_byte(hwif->pci_dev, 0x50, &v);
+	v &= ~(1 << (1 + hwif->channel));
+	v |= sel << (1 + hwif->channel);
+	pci_write_config_byte(hwif->pci_dev, 0x50, v);
+
+	/*
+	 *	Reprogram the UDMA/PIO of the pair drive for the switch
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(pair && itdev->udma[1-unit] != UDMA_OFF) {
+		it821x_program_udma(pair, itdev->udma[1-unit]);
+		it821x_program(pair, itdev->pio[1-unit]);
+	}
+	/*
+	 *	Reprogram the UDMA/PIO of our drive for the switch.
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(itdev->udma[unit] != UDMA_OFF) {
+		it821x_program_udma(drive, itdev->udma[unit]);
+		it821x_program(drive, itdev->pio[unit]);
+	}
+}
+
+/**
+ *	it821x_ratemask	-	Compute available modes
+ *	@drive: IDE drive
+ *
+ *	Compute the available speeds for the devices on the interface. This
+ *	is all modes to ATA133 clipped by drive cable setup.
+ */
+
+static u8 it821x_ratemask (ide_drive_t *drive)
+{
+	u8 mode	= 4;
+	if (!eighty_ninty_three(drive))
+		mode = min(mode, (u8)1);
+	return mode;
+}
+
+/**
+ *	it821x_tuneproc	-	tune a drive
+ *	@drive: drive to tune
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller. By the time we are called the mode has been
+ *	modified as neccessary to handle the absence of seperate
+ *	master/slave timers for MWDMA/PIO.
+ *
+ *	This code is only used in pass through mode.
+ */
+
+static void it821x_tuneproc (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+
+	/* Spec says 89 ref driver uses 88 */
+	static u16 pio[]	= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
+	static u8 pio_want[]    = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
+
+	if(itdev->smart)
+		return;
+
+	/* We prefer 66Mhz clock for PIO 0-3, don't care for PIO4 */
+	itdev->want[unit][1] = pio_want[mode_wanted];
+	itdev->want[unit][0] = 1;	/* PIO is lowest priority */
+	itdev->pio[unit] = pio[mode_wanted];
+	it821x_clock_strategy(drive);
+	it821x_program(drive, itdev->pio[unit]);
+}
+
+/**
+ *	it821x_tune_mwdma	-	tune a channel for MWDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing MWDMA in pass through mode. The caller
+ *	must manage the whole lack of per device MWDMA/PIO timings and
+ *	the shared MWDMA/PIO timing register.
+ */
+
+static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = (void *)ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 dma[]	= { 0x8866, 0x3222, 0x3121 };
+	static u8 mwdma_want[]	= { ATA_ANY, ATA_66, ATA_ANY };
+
+	itdev->want[unit][1] = mwdma_want[mode_wanted];
+	itdev->want[unit][0] = 2;	/* MWDMA is low priority */
+	itdev->mwdma[unit] = dma[mode_wanted];
+	itdev->udma[unit] = UDMA_OFF;
+
+	/* UDMA bits off - Revision 0x10 do them in pairs */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf |= channel ? 0x60: 0x18;
+	else
+		conf |= 1 << (3 + 2 * channel + unit);
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	/* FIXME: do we need to program this ? */
+	/* it821x_program(drive, itdev->mwdma[unit]); */
+}
+
+/**
+ *	it821x_tune_udma	-	tune a channel for UDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing UDMA modes in pass through.
+ */
+
+static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 udma[]	= { 0x4433, 0x4231, 0x3121, 0x2121, 0x1111, 0x2211, 0x1111 };
+	static u8 udma_want[]	= { ATA_ANY, ATA_50, ATA_ANY, ATA_66, ATA_66, ATA_50, ATA_66 };
+
+	itdev->want[unit][1] = udma_want[mode_wanted];
+	itdev->want[unit][0] = 3;	/* UDMA is high priority */
+	itdev->mwdma[unit] = MWDMA_OFF;
+	itdev->udma[unit] = udma[mode_wanted];
+	if(mode_wanted >= 5)
+		itdev->udma[unit] |= 0x8080;	/* UDMA 5/6 select on */
+
+	/* UDMA on. Again revision 0x10 must do the pair */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf &= channel ? 0x9F: 0xE7;
+	else
+		conf &= ~ (1 << (3 + 2 * channel + unit));
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	it821x_program_udma(drive, itdev->udma[unit]);
+
+}
+
+/**
+ *	config_it821x_chipset_for_pio	-	set drive timings
+ *	@drive: drive to tune
+ *	@speed we want
+ *
+ *	Compute the best pio mode we can for a given device. We must
+ *	pick a speed that does not cause problems with the other device
+ *	on the cable.
+ */
+
+static void config_it821x_chipset_for_pio (ide_drive_t *drive, byte set_speed)
+{
+	u8 unit = drive->select.b.unit;
+	ide_hwif_t *hwif = drive->hwif;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+	u8 speed = 0, set_pio	= ide_get_best_pio_mode(drive, 255, 5, NULL);
+	u8 pair_pio;
+
+	/* We have to deal with this mess in pairs */
+	if(pair != NULL) {
+		pair_pio = ide_get_best_pio_mode(pair, 255, 5, NULL);
+		/* Trim PIO to the slowest of the master/slave */
+		if(pair_pio < set_pio)
+			set_pio = pair_pio;
+	}
+	it821x_tuneproc(drive, set_pio);
+	speed = XFER_PIO_0 + set_pio;
+	/* XXX - We trim to the lowest of the pair so the other drive
+	   will always be fine at this point until we do hotplug passthru */
+
+	if (set_speed)
+		(void) ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	it821x_dma_read	-	DMA hook
+ *	@drive: drive for DMA
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock. In addition the rev 0x10 device only works if the same
+ *	timing value is loaded into the master and slave UDMA clock
+ * 	so we must also reload that.
+ *
+ *	FIXME: we could figure out in advance if we need to do reloads
+ */
+
+static void it821x_dma_start(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->mwdma[unit]);
+	else if(itdev->udma[unit] != UDMA_OFF && itdev->timing10)
+		it821x_program_udma(drive, itdev->udma[unit]);
+	ide_dma_start(drive);
+}
+
+/**
+ *	it821x_dma_write	-	DMA hook
+ *	@drive: drive for DMA stop
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock.
+ */
+
+static int it821x_dma_end(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	int unit = drive->select.b.unit;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int ret = __ide_dma_end(drive);
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->pio[unit]);
+	return ret;
+}
+
+
+/**
+ *	it821x_tune_chipset	-	set controller timings
+ *	@drive: Drive to set up
+ *	@xferspeed: speed we want to achieve
+ *
+ *	Tune the ITE chipset for the desired mode. If we can't achieve
+ *	the desired mode then tune for a lower one, but ultimately
+ *	make the thing work.
+ */
+
+static int it821x_tune_chipset (ide_drive_t *drive, byte xferspeed)
+{
+
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	u8 speed		= ide_rate_filter(it821x_ratemask(drive), xferspeed);
+
+	if(!itdev->smart) {
+		switch(speed) {
+			case XFER_PIO_4:
+			case XFER_PIO_3:
+			case XFER_PIO_2:
+			case XFER_PIO_1:
+			case XFER_PIO_0:
+				it821x_tuneproc(drive, (speed - XFER_PIO_0));
+				break;
+			/* MWDMA tuning is really hard because our MWDMA and PIO
+			   timings are kept in the same place. We can switch in the
+			   host dma on/off callbacks */
+			case XFER_MW_DMA_2:
+			case XFER_MW_DMA_1:
+			case XFER_MW_DMA_0:
+				it821x_tune_mwdma(drive, (speed - XFER_MW_DMA_0));
+				break;
+			case XFER_UDMA_6:
+			case XFER_UDMA_5:
+			case XFER_UDMA_4:
+			case XFER_UDMA_3:
+			case XFER_UDMA_2:
+			case XFER_UDMA_1:
+			case XFER_UDMA_0:
+				it821x_tune_udma(drive, (speed - XFER_UDMA_0));
+				break;
+			default:
+				return 1;
+		}
+	}
+	/*
+	 *	In smart mode the clocking is done by the host controller
+	 * 	snooping the mode we picked. The rest of it is not our problem
+	 */
+	return ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	config_chipset_for_dma	-	configure for DMA
+ *	@drive: drive to configure
+ *
+ *	Called by the IDE layer when it wants the timings set up.
+ */
+
+static int config_chipset_for_dma (ide_drive_t *drive)
+{
+	u8 speed	= ide_dma_speed(drive, it821x_ratemask(drive));
+
+	config_it821x_chipset_for_pio(drive, !speed);
+	it821x_tune_chipset(drive, speed);
+	return ide_dma_enable(drive);
+}
+
+/**
+ *	it821x_configure_drive_for_dma	-	set up for DMA transfers
+ *	@drive: drive we are going to set up
+ *
+ *	Set up the drive for DMA, tune the controller and drive as
+ *	required. If the drive isn't suitable for DMA or we hit
+ *	other problems then we will drop down to PIO and set up
+ *	PIO appropriately
+ */
+
+static int it821x_config_drive_for_dma (ide_drive_t *drive)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+
+	if (ide_use_dma(drive)) {
+		if (config_chipset_for_dma(drive))
+			return hwif->ide_dma_on(drive);
+	}
+	config_it821x_chipset_for_pio(drive, 1);
+	return hwif->ide_dma_off_quietly(drive);
+}
+
+/**
+ *	ata66_it821x	-	check for 80 pin cable
+ *	@hwif: interface to check
+ *
+ *	Check for the presence of an ATA66 capable cable on the
+ *	interface. Problematic as it seems some cards don't have
+ *	the needed logic onboard.
+ */
+
+static unsigned int __devinit ata66_it821x(ide_hwif_t *hwif)
+{
+	/* The reference driver also only does disk side */
+	return 1;
+}
+
+/**
+ *	it821x_fixup	-	post init callback
+ *	@hwif: interface
+ *
+ *	This callback is run after the drives have been probed but
+ *	before anything gets attached. It allows drivers to do any
+ *	final tuning that is needed, or fixups to work around bugs.
+ */
+
+static void __devinit it821x_fixups(ide_hwif_t *hwif)
+{
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int i;
+
+	if(!itdev->smart) {
+		/*
+		 *	If we are in pass through mode then not much
+		 *	needs to be done, but we do bother to clear the
+		 *	IRQ mask as we may well be in PIO (eg rev 0x10)
+		 *	for now and we know unmasking is safe on this chipset.
+		 */
+		for (i = 0; i < 2; i++) {
+			ide_drive_t *drive = &hwif->drives[i];
+			if(drive->present)
+				drive->unmask = 1;
+		}
+		return;
+	}
+	/*
+	 *	Perform fixups on smart mode. We need to "lose" some
+	 *	capabilities the firmware lacks but does not filter, and
+	 *	also patch up some capability bits that it forgets to set
+	 *	in RAID mode.
+	 */
+
+	for(i = 0; i < 2; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+		struct hd_driveid *id;
+		u16 *idbits;
+
+		if(!drive->present)
+			continue;
+		id = drive->id;
+		idbits = (u16 *)drive->id;
+
+		/* Check for RAID v native */
+		if(strstr(id->model, "Integrated Technology Express")) {
+			/* In raid mode the ident block is slightly buggy
+			   We need to set the bits so that the IDE layer knows
+			   LBA28. LBA48 and DMA ar valid */
+			id->capability |= 3;		/* LBA28, DMA */
+			id->command_set_2 |= 0x0400;	/* LBA48 valid */
+			id->cfs_enable_2 |= 0x0400;	/* LBA48 on */
+			/* Reporting logic */
+			printk(KERN_INFO "%s: IT8212 %sRAID %d volume",
+				drive->name,
+				idbits[147] ? "Bootable ":"",
+				idbits[129]);
+				if(idbits[129] != 1)
+					printk("(%dK stripe)", idbits[146]);
+				printk(".\n");
+			/* Now the core code will have wrongly decided no DMA
+			   so we need to fix this */
+			hwif->ide_dma_off_quietly(drive);
+#ifdef CONFIG_IDEDMA_ONLYDISK
+			if (drive->media == ide_disk)
+#endif
+				hwif->ide_dma_check(drive);
+		} else {
+			/* Non RAID volume. Fixups to stop the core code
+			   doing unsupported things */
+			id->field_valid &= 1;
+			id->queue_depth = 0;
+			id->command_set_1 = 0;
+			id->command_set_2 &= 0xC400;
+			id->cfsse &= 0xC000;
+			id->cfs_enable_1 = 0;
+			id->cfs_enable_2 &= 0xC400;
+			id->csf_default &= 0xC000;
+			id->word127 = 0;
+			id->dlf = 0;
+			id->csfo = 0;
+			id->cfa_power = 0;
+			printk(KERN_INFO "%s: Performing identify fixups.\n",
+				drive->name);
+		}
+	}
+
+}
+
+/**
+ *	init_hwif_it821x	-	set up hwif structs
+ *	@hwif: interface to set up
+ *
+ *	We do the basic set up of the interface structure. The IT8212
+ *	requires several custom handlers so we override the default
+ *	ide DMA handlers appropriately
+ */
+
+static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
+{
+	struct it821x_dev *idev = kmalloc(sizeof(struct it821x_dev), GFP_KERNEL);
+	u8 conf;
+
+	if(idev == NULL) {
+		printk(KERN_ERR "it821x: out of memory, falling back to legacy behaviour.\n");
+		goto fallback;
+	}
+	memset(idev, 0, sizeof(struct it821x_dev));
+	ide_set_hwifdata(hwif, idev);
+
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(conf & 1) {
+		idev->smart = 1;
+		hwif->atapi_dma = 0;
+		/* Long I/O's although allowed in LBA48 space cause the
+		   onboard firmware to enter the twighlight zone */
+		hwif->rqsize = 256;
+	}
+
+	/* Pull the current clocks from 0x50 also */
+	if (conf & (1 << (1 + hwif->channel)))
+		idev->clock_mode = ATA_50;
+	else
+		idev->clock_mode = ATA_66;
+
+	idev->want[0][1] = ATA_ANY;
+	idev->want[1][1] = ATA_ANY;
+
+	/*
+	 *	Not in the docs but according to the reference driver
+	 *	this is neccessary.
+	 */
+
+	pci_read_config_byte(hwif->pci_dev, 0x08, &conf);
+	if(conf == 0x10) {
+		idev->timing10 = 1;
+		hwif->atapi_dma = 0;
+		if(!idev->smart)
+			printk(KERN_WARNING "it821x: Revision 0x10, workarounds activated.\n");
+	}
+
+	hwif->speedproc = &it821x_tune_chipset;
+	hwif->tuneproc	= &it821x_tuneproc;
+
+	/* MWDMA/PIO clock switching for pass through mode */
+	if(!idev->smart) {
+		hwif->dma_start = &it821x_dma_start;
+		hwif->ide_dma_end = &it821x_dma_end;
+	}
+
+	hwif->drives[0].autotune = 1;
+	hwif->drives[1].autotune = 1;
+
+	if (!hwif->dma_base)
+		goto fallback;
+
+	hwif->ultra_mask = 0x7f;
+	hwif->mwdma_mask = 0x07;
+	hwif->swdma_mask = 0x07;
+
+	hwif->ide_dma_check = &it821x_config_drive_for_dma;
+	if (!(hwif->udma_four))
+		hwif->udma_four = ata66_it821x(hwif);
+
+	/*
+	 *	The BIOS often doesn't set up DMA on this controller
+	 *	so we always do it.
+	 */
+
+	hwif->autodma = 1;
+	hwif->drives[0].autodma = hwif->autodma;
+	hwif->drives[1].autodma = hwif->autodma;
+	return;
+fallback:
+	hwif->autodma = 0;
+	return;
+}
+
+static void __devinit it8212_disable_raid(struct pci_dev *dev)
+{
+	/* Reset local CPU, and set BIOS not ready */
+	pci_write_config_byte(dev, 0x5E, 0x01);
+
+	/* Set to bypass mode, and reset PCI bus */
+	pci_write_config_byte(dev, 0x50, 0x00);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      PCI_COMMAND_PARITY | PCI_COMMAND_IO |
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	pci_write_config_word(dev, 0x40, 0xA0F3);
+
+	pci_write_config_dword(dev,0x4C, 0x02040204);
+	pci_write_config_byte(dev, 0x42, 0x36);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0);
+}
+
+static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const char *name)
+{
+	u8 conf;
+	static char *mode[2] = { "pass through", "smart" };
+
+	/* Force the card into bypass mode if so requested */
+	if (it8212_noraid) {
+		printk(KERN_INFO "it8212: forcing bypass mode.\n");
+		it8212_disable_raid(dev);
+	}
+	pci_read_config_byte(dev, 0x50, &conf);
+	printk(KERN_INFO "it821x: controller in %s mode.\n", mode[conf & 1]);
+	return 0;
+}
+
+
+#define DECLARE_ITE_DEV(name_str)			\
+	{						\
+		.name		= name_str,		\
+		.init_chipset	= init_chipset_it821x,	\
+		.init_hwif	= init_hwif_it821x,	\
+		.channels	= 2,			\
+		.autodma	= AUTODMA,		\
+		.bootable	= ON_BOARD,		\
+		.fixup	 	= it821x_fixups		\
+	}
+
+static ide_pci_device_t it821x_chipsets[] __devinitdata = {
+	/* 0 */ DECLARE_ITE_DEV("IT8212"),
+};
+
+/**
+ *	it821x_init_one	-	pci layer discovery entry
+ *	@dev: PCI device
+ *	@id: ident table entry
+ *
+ *	Called by the PCI code when it finds an ITE821x controller.
+ *	We then use the IDE PCI generic helper to do most of the work.
+ */
+
+static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]);
+	return 0;
+}
+
+static struct pci_device_id it821x_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8211,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8212,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, it821x_pci_tbl);
+
+static struct pci_driver driver = {
+	.name		= "ITE821x IDE",
+	.id_table	= it821x_pci_tbl,
+	.probe		= it821x_init_one,
+};
+
+static int __init it821x_ide_init(void)
+{
+	return ide_pci_register_driver(&driver);
+}
+
+module_init(it821x_ide_init);
+
+module_param_named(noraid, it8212_noraid, int, S_IRUGO);
+MODULE_PARM_DESC(it8212_noraid, "Force card into bypass mode");
+
+MODULE_AUTHOR("Alan Cox");
+MODULE_DESCRIPTION("PCI driver module for the ITE 821x");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a3961e1d518..1e0bc6a8d65 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1815,6 +1815,8 @@
 #define PCI_VENDOR_ID_ITE		0x1283
 #define PCI_DEVICE_ID_ITE_IT8172G	0x8172
 #define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801
+#define PCI_DEVICE_ID_ITE_8211		0x8211
+#define PCI_DEVICE_ID_ITE_8212		0x8212
 #define PCI_DEVICE_ID_ITE_8872		0x8872
 #define PCI_DEVICE_ID_ITE_IT8330G_0	0xe886
 
-- 
cgit v1.2.3-70-g09d2


From 1ad275e3e7d253d44f03868e85977c908e334fed Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 27 Jun 2005 16:28:06 -0700
Subject: [PATCH] pcmcia: device and driver matching

The actual matching of pcmcia drivers and pcmcia devices.  The original
version of this was written by David Woodhouse.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/pcmcia/ds.c             | 123 +++++++++++++++++++++++++++-
 include/linux/mod_devicetable.h |  33 ++++++++
 include/pcmcia/device_id.h      | 175 ++++++++++++++++++++++++++++++++++++++++
 include/pcmcia/ds.h             |   7 +-
 4 files changed, 336 insertions(+), 2 deletions(-)
 create mode 100644 include/pcmcia/device_id.h

(limited to 'include/linux')

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 35d479b0df6..5701b93b2dd 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -101,6 +101,9 @@ struct pcmcia_bus_socket {
 	u8			device_count; /* the number of devices, used
 					       * only internally and subject
 					       * to incorrectness and change */
+
+	u8			device_add_pending;
+	struct work_struct	device_add;
 };
 static spinlock_t pcmcia_dev_list_lock;
 
@@ -512,6 +515,10 @@ static struct pcmcia_device * pcmcia_device_add(struct pcmcia_bus_socket *s, uns
 
 	down(&device_add_lock);
 
+	/* max of 2 devices per card */
+	if (s->device_count == 2)
+		goto err_put;
+
 	p_dev = kmalloc(sizeof(struct pcmcia_device), GFP_KERNEL);
 	if (!p_dev)
 		goto err_put;
@@ -537,6 +544,8 @@ static struct pcmcia_device * pcmcia_device_add(struct pcmcia_bus_socket *s, uns
 	list_add_tail(&p_dev->socket_device_list, &s->devices_list);
 	spin_unlock_irqrestore(&pcmcia_dev_list_lock, flags);
 
+	pcmcia_device_query(p_dev);
+
 	if (device_register(&p_dev->dev)) {
 		spin_lock_irqsave(&pcmcia_dev_list_lock, flags);
 		list_del(&p_dev->socket_device_list);
@@ -591,14 +600,123 @@ static int pcmcia_card_add(struct pcmcia_socket *s)
 }
 
 
+static void pcmcia_delayed_add_pseudo_device(void *data)
+{
+	struct pcmcia_bus_socket *s = data;
+	pcmcia_device_add(s, 0);
+	s->device_add_pending = 0;
+}
+
+static inline void pcmcia_add_pseudo_device(struct pcmcia_bus_socket *s)
+{
+	if (!s->device_add_pending) {
+		schedule_work(&s->device_add);
+		s->device_add_pending = 1;
+	}
+	return;
+}
+
+
+static inline int pcmcia_devmatch(struct pcmcia_device *dev,
+				  struct pcmcia_device_id *did)
+{
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID) {
+		if ((!dev->has_manf_id) || (dev->manf_id != did->manf_id))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID) {
+		if ((!dev->has_card_id) || (dev->card_id != did->card_id))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_FUNCTION) {
+		if (dev->func != did->function)
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1) {
+		if (!dev->prod_id[0])
+			return 0;
+		if (strcmp(did->prod_id[0], dev->prod_id[0]))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2) {
+		if (!dev->prod_id[1])
+			return 0;
+		if (strcmp(did->prod_id[1], dev->prod_id[1]))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3) {
+		if (!dev->prod_id[2])
+			return 0;
+		if (strcmp(did->prod_id[2], dev->prod_id[2]))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4) {
+		if (!dev->prod_id[3])
+			return 0;
+		if (strcmp(did->prod_id[3], dev->prod_id[3]))
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO) {
+		/* handle pseudo multifunction devices:
+		 * there are at most two pseudo multifunction devices.
+		 * if we're matching against the first, schedule a
+		 * call which will then check whether there are two
+		 * pseudo devices, and if not, add the second one.
+		 */
+		if (dev->device_no == 0)
+			pcmcia_add_pseudo_device(dev->socket->pcmcia);
+
+		if (dev->device_no != did->device_no)
+			return 0;
+	}
+
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_FUNC_ID) {
+		if ((!dev->has_func_id) || (dev->func_id != did->func_id))
+			return 0;
+
+		/* if this is a pseudo-multi-function device,
+		 * we need explicit matches */
+		if (did->match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO)
+			return 0;
+		if (dev->device_no)
+			return 0;
+
+		/* also, FUNC_ID matching needs to be activated by userspace
+		 * after it has re-checked that there is no possible module
+		 * with a prod_id/manf_id/card_id match.
+		 */
+		if (!dev->allow_func_id_match)
+			return 0;
+	}
+
+	dev->dev.driver_data = (void *) did;
+
+	return 1;
+}
+
+
 static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) {
 	struct pcmcia_device * p_dev = to_pcmcia_dev(dev);
 	struct pcmcia_driver * p_drv = to_pcmcia_drv(drv);
+	struct pcmcia_device_id *did = p_drv->id_table;
 
 	/* matching by cardmgr */
 	if (p_dev->cardmgr == p_drv)
 		return 1;
 
+	while (did && did->match_flags) {
+		if (pcmcia_devmatch(p_dev, did))
+			return 1;
+		did++;
+	}
+
 	return 0;
 }
 
@@ -922,7 +1040,9 @@ static int bind_request(struct pcmcia_bus_socket *s, bind_info_t *bind_info)
 rescan:
 	p_dev->cardmgr = p_drv;
 
-	pcmcia_device_query(p_dev);
+	/* if a driver is already running, we can abort */
+	if (p_dev->dev.driver)
+		goto err_put_module;
 
 	/*
 	 * Prevent this racing with a card insertion.
@@ -1595,6 +1715,7 @@ static int __devinit pcmcia_bus_add_socket(struct class_device *class_dev)
 
 	init_waitqueue_head(&s->queue);
 	INIT_LIST_HEAD(&s->devices_list);
+	INIT_WORK(&s->device_add, pcmcia_delayed_add_pseudo_device, s);
 
 	/* Set up hotline to Card Services */
 	s->callback.owner = THIS_MODULE;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index d6eb7b2efc0..e9651cd8310 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -175,4 +175,37 @@ struct serio_device_id {
 };
 
 
+/* PCMCIA */
+
+struct pcmcia_device_id {
+	__u16		match_flags;
+
+	__u16		manf_id;
+	__u16 		card_id;
+
+	__u8  		func_id;
+
+	/* for real multi-function devices */
+	__u8  		function;
+
+	/* for pseude multi-function devices */
+	__u8  		device_no;
+
+	const char *	prod_id[4];
+	__u32 		prod_id_hash[4];
+
+	/* not matched against */
+	kernel_ulong_t	driver_info;
+};
+
+#define PCMCIA_DEV_ID_MATCH_MANF_ID	0x0001
+#define PCMCIA_DEV_ID_MATCH_CARD_ID	0x0002
+#define PCMCIA_DEV_ID_MATCH_FUNC_ID	0x0004
+#define PCMCIA_DEV_ID_MATCH_FUNCTION	0x0008
+#define PCMCIA_DEV_ID_MATCH_PROD_ID1	0x0010
+#define PCMCIA_DEV_ID_MATCH_PROD_ID2	0x0020
+#define PCMCIA_DEV_ID_MATCH_PROD_ID3	0x0040
+#define PCMCIA_DEV_ID_MATCH_PROD_ID4	0x0080
+#define PCMCIA_DEV_ID_MATCH_DEVICE_NO	0x0100
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/pcmcia/device_id.h b/include/pcmcia/device_id.h
new file mode 100644
index 00000000000..acf68656de3
--- /dev/null
+++ b/include/pcmcia/device_id.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (2003-2004) 	Dominik Brodowski <linux@brodo.de>
+ *				David Woodhouse
+ *
+ * License: GPL v2
+ */
+
+#define PCMCIA_DEVICE_MANF_CARD(manf, card) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_MANF_ID| \
+			PCMCIA_DEV_ID_MATCH_CARD_ID, \
+	.manf_id = (manf), \
+	.card_id = (card), }
+
+#define PCMCIA_DEVICE_FUNC_ID(func) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_FUNC_ID, \
+	.func_id = (func), }
+
+#define PCMCIA_DEVICE_PROD_ID1(v1, vh1) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1, \
+	.prod_id = { (v1), NULL, NULL, NULL }, \
+	.prod_id_hash = { (vh1), 0, 0, 0 }, }
+
+#define PCMCIA_DEVICE_PROD_ID2(v2, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID2, \
+	.prod_id = { NULL, (v2), NULL, NULL },  \
+	.prod_id_hash = { 0, (vh2), 0, 0 }, }
+
+#define PCMCIA_DEVICE_PROD_ID12(v1, v2, vh1, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 }, }
+
+#define PCMCIA_DEVICE_PROD_ID13(v1, v3, vh1, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3, \
+	.prod_id = { (v1), NULL, (v3), NULL }, \
+	.prod_id_hash = { (vh1), 0, (vh3), 0 }, }
+
+#define PCMCIA_DEVICE_PROD_ID14(v1, v4, vh1, vh4) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID4, \
+	.prod_id = { (v1), NULL, NULL, (v4) }, \
+	.prod_id_hash = { (vh1), 0, 0, (vh4) }, }
+
+#define PCMCIA_DEVICE_PROD_ID123(v1, v2, v3, vh1, vh2, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3, \
+	.prod_id = { (v1), (v2), (v3), NULL },\
+	.prod_id_hash = { (vh1), (vh2), (vh3), 0 }, }
+
+#define PCMCIA_DEVICE_PROD_ID124(v1, v2, v4, vh1, vh2, vh4) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID4, \
+	.prod_id = { (v1), (v2), NULL, (v4) }, \
+	.prod_id_hash = { (vh1), (vh2), 0, (vh4) }, }
+
+#define PCMCIA_DEVICE_PROD_ID134(v1, v3, v4, vh1, vh3, vh4) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID4, \
+	.prod_id = { (v1), NULL, (v3), (v4) }, \
+	.prod_id_hash = { (vh1), 0, (vh3), (vh4) }, }
+
+#define PCMCIA_DEVICE_PROD_ID1234(v1, v2, v3, v4, vh1, vh2, vh3, vh4) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID4, \
+	.prod_id = { (v1), (v2), (v3), (v4) }, \
+	.prod_id_hash = { (vh1), (vh2), (vh3), (vh4) }, }
+
+
+/* multi-function devices */
+
+#define PCMCIA_MFC_DEVICE_MANF_CARD(mfc, manf, card) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_MANF_ID| \
+			PCMCIA_DEV_ID_MATCH_CARD_ID| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.manf_id = (manf), \
+	.card_id = (card), \
+	.function = (mfc), }
+
+#define PCMCIA_MFC_DEVICE_PROD_ID1(mfc, v1, vh1) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { (v1), NULL, NULL, NULL }, \
+	.prod_id_hash = { (vh1), 0, 0, 0 }, \
+	.function = (mfc), }
+
+#define PCMCIA_MFC_DEVICE_PROD_ID2(mfc, v2, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { NULL, (v2), NULL, NULL },  \
+	.prod_id_hash = { 0, (vh2), 0, 0 }, \
+	.function = (mfc), }
+
+#define PCMCIA_MFC_DEVICE_PROD_ID12(mfc, v1, v2, vh1, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 }, \
+	.function = (mfc), }
+
+#define PCMCIA_MFC_DEVICE_PROD_ID13(mfc, v1, v3, vh1, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { (v1), NULL, (v3), NULL }, \
+	.prod_id_hash = { (vh1), 0, (vh3), 0 }, \
+	.function = (mfc), }
+
+#define PCMCIA_MFC_DEVICE_PROD_ID123(mfc, v1, v2, v3, vh1, vh2, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { (v1), (v2), (v3), NULL },\
+	.prod_id_hash = { (vh1), (vh2), (vh3), 0 }, \
+	.function = (mfc), }
+
+/* pseudo multi-function devices */
+
+#define PCMCIA_PFC_DEVICE_MANF_CARD(mfc, manf, card) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_MANF_ID| \
+			PCMCIA_DEV_ID_MATCH_CARD_ID| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.manf_id = (manf), \
+	.card_id = (card), \
+	.device_no = (mfc), }
+
+#define PCMCIA_PFC_DEVICE_PROD_ID1(mfc, v1, vh1) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { (v1), NULL, NULL, NULL }, \
+	.prod_id_hash = { (vh1), 0, 0, 0 }, \
+	.device_no = (mfc), }
+
+#define PCMCIA_PFC_DEVICE_PROD_ID2(mfc, v2, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { NULL, (v2), NULL, NULL },  \
+	.prod_id_hash = { 0, (vh2), 0, 0 }, \
+	.device_no = (mfc), }
+
+#define PCMCIA_PFC_DEVICE_PROD_ID12(mfc, v1, v2, vh1, vh2) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 }, \
+	.device_no = (mfc), }
+
+#define PCMCIA_PFC_DEVICE_PROD_ID13(mfc, v1, v3, vh1, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { (v1), NULL, (v3), NULL }, \
+	.prod_id_hash = { (vh1), 0, (vh3), 0 }, \
+	.device_no = (mfc), }
+
+#define PCMCIA_PFC_DEVICE_PROD_ID123(mfc, v1, v2, v3, vh1, vh2, vh3) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { (v1), (v2), (v3), NULL },\
+	.prod_id_hash = { (vh1), (vh2), (vh3), 0 }, \
+	.device_no = (mfc), }
+
+
+#define PCMCIA_DEVICE_NULL { .match_flags = 0, }
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 312fd958c90..c267edde9d0 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -18,6 +18,8 @@
 
 #include <pcmcia/bulkmem.h>
 #include <pcmcia/cs_types.h>
+#include <pcmcia/device_id.h>
+#include <linux/mod_devicetable.h>
 
 typedef struct tuple_parse_t {
     tuple_t		tuple;
@@ -135,6 +137,7 @@ struct pcmcia_driver {
 	dev_link_t		*(*attach)(void);
 	void			(*detach)(dev_link_t *);
 	struct module		*owner;
+	struct pcmcia_device_id	*id_table;
 	struct device_driver	drv;
 };
 
@@ -173,7 +176,9 @@ struct pcmcia_device {
 	u8			has_manf_id:1;
 	u8			has_card_id:1;
 	u8			has_func_id:1;
-	u8			reserved:5;
+
+	u8			allow_func_id_match:1;
+	u8			reserved:4;
 
 	u8			func_id;
 	u16			manf_id;
-- 
cgit v1.2.3-70-g09d2


From ea7b38825bba66a81745a706da70a1c81adc95bd Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 27 Jun 2005 16:28:07 -0700
Subject: [PATCH] pcmcia: match for fake CIS

Add another match flag for devices needing a CIS override.  The driver will
only probe/attach if the CIS has been replaced before.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/pcmcia/ds.c             |  8 +++++
 include/linux/mod_devicetable.h |  2 ++
 include/pcmcia/device_id.h      | 74 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 3ac7a443f66..c0611d56eab 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -733,6 +733,14 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev,
 			return 0;
 	}
 
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_FAKE_CIS) {
+		if (!dev->socket->fake_cis) {
+			/* FIXME: evaluate using firmware helpers to
+			 * automagically load it from userspace */
+			return 0;
+		}
+	}
+
 	dev->dev.driver_data = (void *) did;
 
 	return 1;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index e9651cd8310..c0106d68bb6 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -196,6 +196,7 @@ struct pcmcia_device_id {
 
 	/* not matched against */
 	kernel_ulong_t	driver_info;
+	char *		cisfile;
 };
 
 #define PCMCIA_DEV_ID_MATCH_MANF_ID	0x0001
@@ -207,5 +208,6 @@ struct pcmcia_device_id {
 #define PCMCIA_DEV_ID_MATCH_PROD_ID3	0x0040
 #define PCMCIA_DEV_ID_MATCH_PROD_ID4	0x0080
 #define PCMCIA_DEV_ID_MATCH_DEVICE_NO	0x0100
+#define PCMCIA_DEV_ID_MATCH_FAKE_CIS	0x0200
 
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/pcmcia/device_id.h b/include/pcmcia/device_id.h
index acf68656de3..346d81ece28 100644
--- a/include/pcmcia/device_id.h
+++ b/include/pcmcia/device_id.h
@@ -171,5 +171,79 @@
 	.prod_id_hash = { (vh1), (vh2), (vh3), 0 }, \
 	.device_no = (mfc), }
 
+/* cards needing a CIS override */
+
+#define PCMCIA_DEVICE_CIS_MANF_CARD(manf, card, _cisfile) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_MANF_ID| \
+			PCMCIA_DEV_ID_MATCH_CARD_ID, \
+	.manf_id = (manf), \
+	.card_id = (card), \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_DEVICE_CIS_PROD_ID12(v1, v2, vh1, vh2, _cisfile) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 }, \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_DEVICE_CIS_PROD_ID123(v1, v2, v3, vh1, vh2, vh3, _cisfile) { \
+	.match_flags = PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID3, \
+	.prod_id = { (v1), (v2), (v3), NULL },\
+	.prod_id_hash = { (vh1), (vh2), (vh3), 0 }, \
+	.cisfile = (_cisfile)}
+
+
+#define PCMCIA_DEVICE_CIS_PROD_ID2(v2, vh2, _cisfile) { \
+	.match_flags =  PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2, \
+	.prod_id = { NULL, (v2), NULL, NULL },  \
+	.prod_id_hash = { 0, (vh2), 0, 0 }, \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_PFC_DEVICE_CIS_PROD_ID12(mfc, v1, v2, vh1, vh2, _cisfile) { \
+	.match_flags =  PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_DEVICE_NO, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 },\
+	.device_no = (mfc), \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_MFC_DEVICE_CIS_MANF_CARD(mfc, manf, card, _cisfile) { \
+	.match_flags =  PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_MANF_ID| \
+			PCMCIA_DEV_ID_MATCH_CARD_ID| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.manf_id = (manf), \
+	.card_id = (card), \
+	.function = (mfc), \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_MFC_DEVICE_CIS_PROD_ID12(mfc, v1, v2, vh1, vh2, _cisfile) { \
+	.match_flags =  PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID1| \
+			PCMCIA_DEV_ID_MATCH_PROD_ID2| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { (v1), (v2), NULL, NULL }, \
+	.prod_id_hash = { (vh1), (vh2), 0, 0 }, \
+	.function = (mfc), \
+	.cisfile = (_cisfile)}
+
+#define PCMCIA_MFC_DEVICE_CIS_PROD_ID4(mfc, v4, vh4, _cisfile) { \
+	.match_flags =  PCMCIA_DEV_ID_MATCH_FAKE_CIS | \
+			PCMCIA_DEV_ID_MATCH_PROD_ID4| \
+			PCMCIA_DEV_ID_MATCH_FUNCTION, \
+	.prod_id = { NULL, NULL, NULL, (v4) }, \
+	.prod_id_hash = { 0, 0, 0, (vh4) }, \
+	.function = (mfc), \
+	.cisfile = (_cisfile)}
+
 
 #define PCMCIA_DEVICE_NULL { .match_flags = 0, }
-- 
cgit v1.2.3-70-g09d2


From f602ff7eb4e44e7245bfeeba4d078144703fcd76 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 27 Jun 2005 16:28:09 -0700
Subject: [PATCH] pcmcia: match "anonymous" cards

If a card doesn't provide _any_ information about itself, assume it is a
so-called "anonymous" card.  pcmciamtd will bind to it if it is configured to
do so.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/pcmcia/ds.c             | 9 +++++++++
 include/linux/mod_devicetable.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index f657a2a77b2..66680699e91 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -746,6 +746,15 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev,
 		}
 	}
 
+	if (did->match_flags & PCMCIA_DEV_ID_MATCH_ANONYMOUS) {
+		int i;
+		for (i=0; i<4; i++)
+			if (dev->prod_id[i])
+				return 0;
+		if (dev->has_manf_id || dev->has_card_id || dev->has_func_id)
+			return 0;
+	}
+
 	dev->dev.driver_data = (void *) did;
 
 	return 1;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index c0106d68bb6..8a8dc82a941 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -209,5 +209,6 @@ struct pcmcia_device_id {
 #define PCMCIA_DEV_ID_MATCH_PROD_ID4	0x0080
 #define PCMCIA_DEV_ID_MATCH_DEVICE_NO	0x0100
 #define PCMCIA_DEV_ID_MATCH_FAKE_CIS	0x0200
+#define PCMCIA_DEV_ID_MATCH_ANONYMOUS	0x0400
 
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.2.3-70-g09d2


From aecab27aeabaa897d69fc082686df314329830de Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 27 Jun 2005 16:28:56 -0700
Subject: [PATCH] pcmcia: mod_devicetable.h fix for different sizes in kernel-
 and userspace

The size of pointers may differ between (userspace) modpost and (kernelspace)
modules -- so fix mod_devicetable.h to reflect this possibility.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mod_devicetable.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 8a8dc82a941..9b6d05172ed 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -191,12 +191,22 @@ struct pcmcia_device_id {
 	/* for pseude multi-function devices */
 	__u8  		device_no;
 
-	const char *	prod_id[4];
 	__u32 		prod_id_hash[4];
 
+	/* not matched against in kernelspace*/
+#ifdef __KERNEL__
+	const char *	prod_id[4];
+#else
+	kernel_ulong_t	prod_id[4];
+#endif
+
 	/* not matched against */
 	kernel_ulong_t	driver_info;
+#ifdef __KERNEL__
 	char *		cisfile;
+#else
+	kernel_ulong_t	cisfile;
+#endif
 };
 
 #define PCMCIA_DEV_ID_MATCH_MANF_ID	0x0001
-- 
cgit v1.2.3-70-g09d2


From a5fe736eaf9bae1b45317313de04b564441b94f2 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 27 Jun 2005 22:47:18 -0400
Subject: Update is_multicast_ether_addr() definition; net/ieee80211.h
 cleanups.

---
 include/linux/etherdevice.h |  2 +-
 include/net/ieee80211.h     | 48 ++++++++++-----------------------------------
 2 files changed, 11 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index a1478258d00..8a2df4dfbc5 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -65,7 +65,7 @@ static inline int is_zero_ether_addr(const u8 *addr)
  */
 static inline int is_multicast_ether_addr(const u8 *addr)
 {
-	return addr[0] & 0x01;
+	return ((addr[0] != 0xff) && (0x01 & addr[0]));
 }
 
 /**
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 7fe57f957a5..151c4f20355 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -94,6 +94,8 @@ struct eapol {
 	u16 length;
 } __attribute__ ((packed));
 
+#define IEEE80211_1ADDR_LEN 10
+#define IEEE80211_2ADDR_LEN 16
 #define IEEE80211_3ADDR_LEN 24
 #define IEEE80211_4ADDR_LEN 30
 #define IEEE80211_FCS_LEN    4
@@ -300,23 +302,6 @@ struct ieee80211_snap_hdr {
 #define WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH 9
 
 
-/* Information Element IDs */
-#define WLAN_EID_SSID 0
-#define WLAN_EID_SUPP_RATES 1
-#define WLAN_EID_FH_PARAMS 2
-#define WLAN_EID_DS_PARAMS 3
-#define WLAN_EID_CF_PARAMS 4
-#define WLAN_EID_TIM 5
-#define WLAN_EID_IBSS_PARAMS 6
-#define WLAN_EID_CHALLENGE 16
-#define WLAN_EID_RSN 48
-#define WLAN_EID_GENERIC 221
-
-#define IEEE80211_MGMT_HDR_LEN 24
-#define IEEE80211_DATA_HDR3_LEN 24
-#define IEEE80211_DATA_HDR4_LEN 30
-
-
 #define IEEE80211_STATMASK_SIGNAL (1<<0)
 #define IEEE80211_STATMASK_RSSI (1<<1)
 #define IEEE80211_STATMASK_NOISE (1<<2)
@@ -441,6 +426,8 @@ struct ieee80211_stats {
 
 struct ieee80211_device;
 
+#include "ieee80211_crypt.h"
+
 #define SEC_KEY_1         (1<<0)
 #define SEC_KEY_2         (1<<1)
 #define SEC_KEY_3         (1<<2)
@@ -488,15 +475,6 @@ Total: 28-2340 bytes
 
 */
 
-struct ieee80211_header_data {
-	u16 frame_ctl;
-	u16 duration_id;
-	u8 addr1[6];
-	u8 addr2[6];
-	u8 addr3[6];
-	u16 seq_ctrl;
-};
-
 #define BEACON_PROBE_SSID_ID_POSITION 12
 
 /* Management Frame Information Element Types */
@@ -541,7 +519,7 @@ struct ieee80211_info_element {
 */
 
 struct ieee80211_authentication {
-	struct ieee80211_header_data header;
+	struct ieee80211_hdr_3addr header;
 	u16 algorithm;
 	u16 transaction;
 	u16 status;
@@ -550,7 +528,7 @@ struct ieee80211_authentication {
 
 
 struct ieee80211_probe_response {
-	struct ieee80211_header_data header;
+	struct ieee80211_hdr_3addr header;
 	u32 time_stamp[2];
 	u16 beacon_interval;
 	u16 capability;
@@ -648,12 +626,6 @@ enum ieee80211_state {
 #define MAC_ARG(x) ((u8*)(x))[0],((u8*)(x))[1],((u8*)(x))[2],((u8*)(x))[3],((u8*)(x))[4],((u8*)(x))[5]
 
 
-extern inline int is_broadcast_ether_addr(const u8 *addr)
-{
-	return ((addr[0] == 0xff) && (addr[1] == 0xff) && (addr[2] == 0xff) &&   \
-		(addr[3] == 0xff) && (addr[4] == 0xff) && (addr[5] == 0xff));
-}
-
 #define CFG_IEEE80211_RESERVE_FCS (1<<0)
 #define CFG_IEEE80211_COMPUTE_FCS (1<<1)
 
@@ -787,21 +759,21 @@ extern inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mod
 
 extern inline int ieee80211_get_hdrlen(u16 fc)
 {
-	int hdrlen = 24;
+	int hdrlen = IEEE80211_3ADDR_LEN;
 
 	switch (WLAN_FC_GET_TYPE(fc)) {
 	case IEEE80211_FTYPE_DATA:
 		if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
-			hdrlen = 30; /* Addr4 */
+			hdrlen = IEEE80211_4ADDR_LEN;
 		break;
 	case IEEE80211_FTYPE_CTL:
 		switch (WLAN_FC_GET_STYPE(fc)) {
 		case IEEE80211_STYPE_CTS:
 		case IEEE80211_STYPE_ACK:
-			hdrlen = 10;
+			hdrlen = IEEE80211_1ADDR_LEN;
 			break;
 		default:
-			hdrlen = 16;
+			hdrlen = IEEE80211_2ADDR_LEN;
 			break;
 		}
 		break;
-- 
cgit v1.2.3-70-g09d2


From c431ada45d65b305a6aab4557067e564b23ce5a5 Mon Sep 17 00:00:00 2001
From: Rajesh Shah <rajesh.shah@intel.com>
Date: Thu, 28 Apr 2005 00:25:45 -0700
Subject: [PATCH] acpi bridge hotadd: ACPI based root bridge hot-add

When you hot-plug a (root) bridge hierarchy, it may have p2p bridges and
devices attached to it that have not been configured by firmware.  In this
case, we need to configure the devices before starting them.  This patch
separates device start from device scan so that we can introduce the
configuration step in the middle.

I kept the existing semantics for pci_scan_bus() since there are a huge number
of callers to that function.

Also, I have no way of testing the changes I made to the parisc files, so this
needs review by those folks.  Sorry for the massive cross-post, this touches
files in many different places.

Signed-off-by: Rajesh Shah <rajesh.shah@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/i386/pci/common.c   |  2 +-
 arch/i386/pci/legacy.c   |  2 ++
 arch/i386/pci/numa.c     |  2 ++
 arch/ia64/pci/pci.c      |  2 +-
 drivers/acpi/pci_bind.c  | 16 +++++++++++++++-
 drivers/acpi/pci_root.c  | 24 +++++++++++++++++++++++-
 drivers/parisc/dino.c    |  1 +
 drivers/parisc/lba_pci.c |  2 ++
 drivers/pci/probe.c      |  2 --
 include/linux/pci.h      |  8 ++++++--
 10 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 2a2e79fbfef..87325263cd4 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -134,7 +134,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
 
 	printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
 
-	return pci_scan_bus(busnum, &pci_root_ops, NULL);
+	return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
 }
 
 extern u8 pci_cache_line_size;
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
index 1492e375386..149a9588c25 100644
--- a/arch/i386/pci/legacy.c
+++ b/arch/i386/pci/legacy.c
@@ -45,6 +45,8 @@ static int __init pci_legacy_init(void)
 
 	printk("PCI: Probing PCI hardware\n");
 	pci_root_bus = pcibios_scan_root(0);
+	if (pci_root_bus)
+		pci_bus_add_devices(pci_root_bus);
 
 	pcibios_fixup_peer_bridges();
 
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
index 9e369546189..adbe17a38f6 100644
--- a/arch/i386/pci/numa.c
+++ b/arch/i386/pci/numa.c
@@ -115,6 +115,8 @@ static int __init pci_numa_init(void)
 		return 0;
 
 	pci_root_bus = pcibios_scan_root(0);
+	if (pci_root_bus)
+		pci_bus_add_devices(pci_root_bus);
 	if (num_online_nodes() > 1)
 		for_each_online_node(quad) {
 			if (quad == 0)
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index e3fc4edea11..c0661d3382e 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -312,7 +312,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
 			&info);
 
-	pbus = pci_scan_bus(bus, &pci_root_ops, controller);
+	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
 	if (pbus)
 		pcibios_setup_root_windows(pbus, controller);
 
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 5d19b39e9e2..7753df1f9fb 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -129,6 +129,8 @@ acpi_pci_bind (
 	char			*pathname = NULL;
 	struct acpi_buffer	buffer = {0, NULL};
 	acpi_handle		handle = NULL;
+	struct pci_dev		*dev;
+	struct pci_bus 		*bus;
 
 	ACPI_FUNCTION_TRACE("acpi_pci_bind");
 
@@ -193,8 +195,20 @@ acpi_pci_bind (
 	 * Locate matching device in PCI namespace.  If it doesn't exist
 	 * this typically means that the device isn't currently inserted
 	 * (e.g. docking station, port replicator, etc.).
+	 * We cannot simply search the global pci device list, since
+	 * PCI devices are added to the global pci list when the root
+	 * bridge start ops are run, which may not have happened yet.
 	 */
-	data->dev = pci_find_slot(data->id.bus, PCI_DEVFN(data->id.device, data->id.function));
+	bus = pci_find_bus(data->id.segment, data->id.bus);
+	if (bus) {
+		list_for_each_entry(dev, &bus->devices, bus_list) {
+			if (dev->devfn == PCI_DEVFN(data->id.device,
+						data->id.function)) {
+				data->dev = dev;
+				break;
+			}
+		}
+	}
 	if (!data->dev) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, 
 			"Device %02x:%02x:%02x.%02x not present in PCI namespace\n",
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 7e6b8e3b2ed..5d2f77fcd50 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -46,6 +46,7 @@ ACPI_MODULE_NAME		("pci_root")
 
 static int acpi_pci_root_add (struct acpi_device *device);
 static int acpi_pci_root_remove (struct acpi_device *device, int type);
+static int acpi_pci_root_start (struct acpi_device *device);
 
 static struct acpi_driver acpi_pci_root_driver = {
 	.name =		ACPI_PCI_ROOT_DRIVER_NAME,
@@ -54,6 +55,7 @@ static struct acpi_driver acpi_pci_root_driver = {
 	.ops =		{
 				.add =    acpi_pci_root_add,
 				.remove = acpi_pci_root_remove,
+				.start =  acpi_pci_root_start,
 			},
 };
 
@@ -169,6 +171,7 @@ acpi_pci_root_add (
 	if (!root)
 		return_VALUE(-ENOMEM);
 	memset(root, 0, sizeof(struct acpi_pci_root));
+	INIT_LIST_HEAD(&root->node);
 
 	root->handle = device->handle;
 	strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
@@ -298,12 +301,31 @@ acpi_pci_root_add (
 			root->id.bus);
 
 end:
-	if (result)
+	if (result) {
+		if (!list_empty(&root->node))
+			list_del(&root->node);
 		kfree(root);
+	}
 
 	return_VALUE(result);
 }
 
+static int
+acpi_pci_root_start (
+	struct acpi_device	*device)
+{
+	struct acpi_pci_root	*root;
+
+	ACPI_FUNCTION_TRACE("acpi_pci_root_start");
+
+	list_for_each_entry(root, &acpi_pci_roots, node) {
+		if (root->handle == device->handle) {
+			pci_bus_add_devices(root->bus);
+			return_VALUE(0);
+		}
+	}
+	return_VALUE(-ENODEV);
+}
 
 static int
 acpi_pci_root_remove (
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index b0d2a73d1d4..2f2dbef2c3b 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -993,6 +993,7 @@ dino_driver_callback(struct parisc_device *dev)
 	bus = pci_scan_bus_parented(&dev->dev, dino_current_bus,
 				    &dino_cfg_ops, NULL);
 	if(bus) {
+		pci_bus_add_devices(bus);
 		/* This code *depends* on scanning being single threaded
 		 * if it isn't, this global bus number count will fail
 		 */
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index dc838804c0d..7fdd80b7eb4 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1570,6 +1570,8 @@ lba_driver_probe(struct parisc_device *dev)
 	lba_bus = lba_dev->hba.hba_bus =
 		pci_scan_bus_parented(&dev->dev, lba_dev->hba.bus_num.start,
 				cfg_ops, NULL);
+	if (lba_bus)
+		pci_bus_add_devices(lba_bus);
 
 	/* This is in lieu of calling pci_assign_unassigned_resources() */
 	if (is_pdc_pat()) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index fd48b201eb5..3dc00f0ca8a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -911,8 +911,6 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus,
 
 	b->subordinate = pci_scan_child_bus(b);
 
-	pci_bus_add_devices(b);
-
 	return b;
 
 sys_create_link_err:
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b5238bd1883..0e9844929fe 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -734,16 +734,20 @@ void pcibios_update_irq(struct pci_dev *, int irq);
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
+void pci_bus_add_devices(struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata);
 static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata)
 {
-	return pci_scan_bus_parented(NULL, bus, ops, sysdata);
+	struct pci_bus *root_bus;
+	root_bus = pci_scan_bus_parented(NULL, bus, ops, sysdata);
+	if (root_bus)
+		pci_bus_add_devices(root_bus);
+	return root_bus;
 }
 int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn);
 unsigned int pci_scan_child_bus(struct pci_bus *bus);
 void pci_bus_add_device(struct pci_dev *dev);
-void pci_bus_add_devices(struct pci_bus *bus);
 void pci_name_device(struct pci_dev *dev);
 char *pci_class_name(u32 class);
 void pci_read_bridge_bases(struct pci_bus *child);
-- 
cgit v1.2.3-70-g09d2


From b1bb248a5d2230a3d8ef42199c742194a8580b15 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Thu, 28 Apr 2005 00:25:58 -0700
Subject: [PATCH] ACPI based I/O APIC hot-plug: add interfaces

This patch adds the following new interfaces for I/O xAPIC
hotplug. The implementation of these interfaces depends on each
architecture.

    o int acpi_register_ioapic(acpi_handle handle, u64 phys_addr,
			       u32 gsi_base);

        This new interface is to add a new I/O xAPIC specified by
        phys_addr and gsi_base pair. phys_addr is the physical address
        to which the I/O xAPIC is mapped and gsi_base is global system
        interrupt base of the I/O xAPIC. acpi_register_ioapic returns
        0 on success, or negative value on error.

    o int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);

        This new interface is to remove a I/O xAPIC specified by
        gsi_base. acpi_unregister_ioapic returns 0 on success, or
        negative value on error.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/i386/kernel/acpi/boot.c | 16 ++++++++++++++++
 arch/ia64/kernel/acpi.c      | 17 +++++++++++++++++
 include/linux/acpi.h         |  3 +++
 3 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 9f63ae0f404..f5360d88bcf 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -507,6 +507,22 @@ acpi_unmap_lsapic(int cpu)
 EXPORT_SYMBOL(acpi_unmap_lsapic);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+int
+acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int
+acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
 static unsigned long __init
 acpi_scan_rsdp (
 	unsigned long		start,
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 72dfd9e7de0..ab798867acd 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -825,4 +825,21 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
 	return AE_OK;
 }
 #endif /* CONFIG_NUMA */
+
+int
+acpi_register_ioapic (acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int
+acpi_unregister_ioapic (acpi_handle handle, u32 gsi_base)
+{
+	/* TBD */
+	return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
 #endif /* CONFIG_ACPI_BOOT */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b123cc08773..f5bc298707e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -407,6 +407,9 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu);
 int acpi_unmap_lsapic(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
+
 extern int acpi_mp_config;
 
 extern u32 pci_mmcfg_base_addr;
-- 
cgit v1.2.3-70-g09d2


From a0d399a808916d22c1c222c6b5ca4e8edd6d91a9 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Thu, 28 Apr 2005 00:25:59 -0700
Subject: [PATCH] ACPI based I/O APIC hot-plug: acpiphp support

This patch adds PCI based I/O xAPIC hot-add support to ACPIPHP
driver. When PCI root bridge is hot-added, all PCI based I/O xAPICs
under the root bridge are hot-added by this patch. Hot-remove support
is TBD.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/hotplug/acpiphp_glue.c | 127 +++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h            |   2 +
 2 files changed, 129 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index b4a92123625..424e7de181a 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -552,6 +552,132 @@ static void remove_bridge(acpi_handle handle)
 	}
 }
 
+static struct pci_dev * get_apic_pci_info(acpi_handle handle)
+{
+	struct acpi_pci_id id;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+
+	if (ACPI_FAILURE(acpi_get_pci_id(handle, &id)))
+		return NULL;
+
+	bus = pci_find_bus(id.segment, id.bus);
+	if (!bus)
+		return NULL;
+
+	dev = pci_get_slot(bus, PCI_DEVFN(id.device, id.function));
+	if (!dev)
+		return NULL;
+
+	if ((dev->class != PCI_CLASS_SYSTEM_PIC_IOAPIC) &&
+	    (dev->class != PCI_CLASS_SYSTEM_PIC_IOXAPIC))
+	{
+		pci_dev_put(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+static int get_gsi_base(acpi_handle handle, u32 *gsi_base)
+{
+	acpi_status status;
+	int result = -1;
+	unsigned long gsb;
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *obj;
+	void *table;
+
+	status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb);
+	if (ACPI_SUCCESS(status)) {
+		*gsi_base = (u32)gsb;
+		return 0;
+	}
+
+	status = acpi_evaluate_object(handle, "_MAT", NULL, &buffer);
+	if (ACPI_FAILURE(status) || !buffer.length || !buffer.pointer)
+		return -1;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER)
+		goto out;
+
+	table = obj->buffer.pointer;
+	switch (((acpi_table_entry_header *)table)->type) {
+	case ACPI_MADT_IOSAPIC:
+		*gsi_base = ((struct acpi_table_iosapic *)table)->global_irq_base;
+		result = 0;
+		break;
+	case ACPI_MADT_IOAPIC:
+		*gsi_base = ((struct acpi_table_ioapic *)table)->global_irq_base;
+		result = 0;
+		break;
+	default:
+		break;
+	}
+ out:
+	acpi_os_free(buffer.pointer);
+	return result;
+}
+
+static acpi_status
+ioapic_add(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	acpi_status status;
+	unsigned long sta;
+	acpi_handle tmp;
+	struct pci_dev *pdev;
+	u32 gsi_base;
+	u64 phys_addr;
+
+	/* Evaluate _STA if present */
+	status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
+	if (ACPI_SUCCESS(status) && sta != ACPI_STA_ALL)
+		return AE_CTRL_DEPTH;
+
+	/* Scan only PCI bus scope */
+	status = acpi_get_handle(handle, "_HID", &tmp);
+	if (ACPI_SUCCESS(status))
+		return AE_CTRL_DEPTH;
+
+	if (get_gsi_base(handle, &gsi_base))
+		return AE_OK;
+
+	pdev = get_apic_pci_info(handle);
+	if (!pdev)
+		return AE_OK;
+
+	if (pci_enable_device(pdev)) {
+		pci_dev_put(pdev);
+		return AE_OK;
+	}
+
+	pci_set_master(pdev);
+
+	if (pci_request_region(pdev, 0, "I/O APIC(acpiphp)")) {
+		pci_disable_device(pdev);
+		pci_dev_put(pdev);
+		return AE_OK;
+	}
+
+	phys_addr = pci_resource_start(pdev, 0);
+	if (acpi_register_ioapic(handle, phys_addr, gsi_base)) {
+		pci_release_region(pdev, 0);
+		pci_disable_device(pdev);
+		pci_dev_put(pdev);
+		return AE_OK;
+	}
+
+	return AE_OK;
+}
+
+static int acpiphp_configure_ioapics(acpi_handle handle)
+{
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
+			    ACPI_UINT32_MAX, ioapic_add, NULL, NULL);
+	return 0;
+}
+
 static int power_on_slot(struct acpiphp_slot *slot)
 {
 	acpi_status status;
@@ -942,6 +1068,7 @@ static int acpiphp_configure_bridge (acpi_handle handle)
 	acpiphp_sanitize_bus(bus);
 	acpiphp_set_hpp_values(handle, bus);
 	pci_enable_bridges(bus);
+	acpiphp_configure_ioapics(handle);
 	return 0;
 }
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index bf608808a60..810bbbcee40 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -62,6 +62,8 @@
 
 #define PCI_BASE_CLASS_SYSTEM		0x08
 #define PCI_CLASS_SYSTEM_PIC		0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC	0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC	0x080020
 #define PCI_CLASS_SYSTEM_DMA		0x0801
 #define PCI_CLASS_SYSTEM_TIMER		0x0802
 #define PCI_CLASS_SYSTEM_RTC		0x0803
-- 
cgit v1.2.3-70-g09d2


From 2311b1f2bbd36fa5f366a7448c718b2556e0f02c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 13 May 2005 17:44:10 +1000
Subject: [PATCH] PCI: fix-pci-mmap-on-ppc-and-ppc64.patch

This is an updated version of Ben's fix-pci-mmap-on-ppc-and-ppc64.patch
which is in 2.6.12-rc4-mm1.

It fixes the patch to work on PPC iSeries, removes some debug printks
at Ben's request, and incorporates your
fix-pci-mmap-on-ppc-and-ppc64-fix.patch also.

Originally from Benjamin Herrenschmidt <benh@kernel.crashing.org>

This patch was discussed at length on linux-pci and so far, the last
iteration of it didn't raise any comment.  It's effect is a nop on
architecture that don't define the new pci_resource_to_user() callback
anyway.  It allows architecture like ppc who put weird things inside of
PCI resource structures to convert to some different value for user
visible ones.  It also fixes mmap'ing of IO space on those archs.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/ppc/kernel/pci.c   | 21 +++++++++++++++++++--
 arch/ppc64/kernel/pci.c | 22 ++++++++++++++++++++--
 drivers/pci/pci-sysfs.c | 26 +++++++++++++++++++++-----
 drivers/pci/proc.c      | 14 ++++++++++----
 include/asm-ppc/pci.h   |  6 ++++++
 include/asm-ppc64/pci.h |  7 +++++++
 include/linux/pci.h     | 14 ++++++++++++++
 7 files changed, 97 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c
index 6d7b92d7245..70cfb6ffd87 100644
--- a/arch/ppc/kernel/pci.c
+++ b/arch/ppc/kernel/pci.c
@@ -1495,7 +1495,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 		*offset += hose->pci_mem_offset;
 		res_bit = IORESOURCE_MEM;
 	} else {
-		io_offset = (unsigned long)hose->io_base_virt;
+		io_offset = hose->io_base_virt - ___IO_BASE;
 		*offset += io_offset;
 		res_bit = IORESOURCE_IO;
 	}
@@ -1522,7 +1522,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 
 		/* found it! construct the final physical address */
 		if (mmap_state == pci_mmap_io)
-			*offset += hose->io_base_phys - _IO_BASE;
+			*offset += hose->io_base_phys - io_offset;
 		return rp;
 	}
 
@@ -1739,6 +1739,23 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
 	return result;
 }
 
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+			  const struct resource *rsrc,
+			  u64 *start, u64 *end)
+{
+	struct pci_controller *hose = pci_bus_to_hose(dev->bus->number);
+	unsigned long offset = 0;
+
+	if (hose == NULL)
+		return;
+
+	if (rsrc->flags & IORESOURCE_IO)
+		offset = ___IO_BASE - hose->io_base_virt + hose->io_base_phys;
+
+	*start = rsrc->start + offset;
+	*end = rsrc->end + offset;
+}
+
 void __init
 pci_init_resource(struct resource *res, unsigned long start, unsigned long end,
 		  int flags, char *name)
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 580676f87d2..ae6f579d3fa 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -351,7 +351,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 		*offset += hose->pci_mem_offset;
 		res_bit = IORESOURCE_MEM;
 	} else {
-		io_offset = (unsigned long)hose->io_base_virt;
+		io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
 		*offset += io_offset;
 		res_bit = IORESOURCE_IO;
 	}
@@ -378,7 +378,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
 
 		/* found it! construct the final physical address */
 		if (mmap_state == pci_mmap_io)
-			*offset += hose->io_base_phys - io_offset;
+		       	*offset += hose->io_base_phys - io_offset;
 		return rp;
 	}
 
@@ -944,4 +944,22 @@ int pci_read_irq_line(struct pci_dev *pci_dev)
 }
 EXPORT_SYMBOL(pci_read_irq_line);
 
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+			  const struct resource *rsrc,
+			  u64 *start, u64 *end)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned long offset = 0;
+
+	if (hose == NULL)
+		return;
+
+	if (rsrc->flags & IORESOURCE_IO)
+		offset = pci_io_base - (unsigned long)hose->io_base_virt +
+			hose->io_base_phys;
+
+	*start = rsrc->start + offset;
+	*end = rsrc->end + offset;
+}
+
 #endif /* CONFIG_PPC_MULTIPLATFORM */
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index a15f94072a6..cc9d65388e6 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -60,15 +60,18 @@ resource_show(struct device * dev, struct device_attribute *attr, char * buf)
 	char * str = buf;
 	int i;
 	int max = 7;
+	u64 start, end;
 
 	if (pci_dev->subordinate)
 		max = DEVICE_COUNT_RESOURCE;
 
 	for (i = 0; i < max; i++) {
-		str += sprintf(str,"0x%016lx 0x%016lx 0x%016lx\n",
-			       pci_resource_start(pci_dev,i),
-			       pci_resource_end(pci_dev,i),
-			       pci_resource_flags(pci_dev,i));
+		struct resource *res =  &pci_dev->resource[i];
+		pci_resource_to_user(pci_dev, i, res, &start, &end);
+		str += sprintf(str,"0x%016llx 0x%016llx 0x%016llx\n",
+			       (unsigned long long)start,
+			       (unsigned long long)end,
+			       (unsigned long long)res->flags);
 	}
 	return (str - buf);
 }
@@ -313,8 +316,21 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 						       struct device, kobj));
 	struct resource *res = (struct resource *)attr->private;
 	enum pci_mmap_state mmap_type;
+	u64 start, end;
+	int i;
 
-	vma->vm_pgoff += res->start >> PAGE_SHIFT;
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		if (res == &pdev->resource[i])
+			break;
+	if (i >= PCI_ROM_RESOURCE)
+		return -ENODEV;
+
+	/* pci_mmap_page_range() expects the same kind of entry as coming
+	 * from /proc/bus/pci/ which is a "user visible" value. If this is
+	 * different from the resource itself, arch will do necessary fixup.
+	 */
+	pci_resource_to_user(pdev, i, res, &start, &end);
+	vma->vm_pgoff += start >> PAGE_SHIFT;
 	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
 
 	return pci_mmap_page_range(pdev, vma, mmap_type, 0);
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index e68bbfb1e7c..7988fc8df3f 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -355,14 +355,20 @@ static int show_device(struct seq_file *m, void *v)
 			dev->device,
 			dev->irq);
 	/* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */
-	for(i=0; i<7; i++)
+	for (i=0; i<7; i++) {
+		u64 start, end;
+		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
 		seq_printf(m, LONG_FORMAT,
-			dev->resource[i].start |
+			((unsigned long)start) |
 			(dev->resource[i].flags & PCI_REGION_FLAG_MASK));
-	for(i=0; i<7; i++)
+	}
+	for (i=0; i<7; i++) {
+		u64 start, end;
+		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
 		seq_printf(m, LONG_FORMAT,
 			dev->resource[i].start < dev->resource[i].end ?
-			dev->resource[i].end - dev->resource[i].start + 1 : 0);
+			(unsigned long)(end - start) + 1 : 0);
+	}
 	seq_putc(m, '\t');
 	if (drv)
 		seq_printf(m, "%s", drv->name);
diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h
index ce5ae6d048f..002e7b30577 100644
--- a/include/asm-ppc/pci.h
+++ b/include/asm-ppc/pci.h
@@ -103,6 +103,12 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 unsigned long size,
 					 pgprot_t prot);
 
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
+				 const struct resource *rsrc,
+				 u64 *start, u64 *end);
+
+
 #endif	/* __KERNEL__ */
 
 #endif /* __PPC_PCI_H */
diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h
index 6cd593f660a..411bf5dee39 100644
--- a/include/asm-ppc64/pci.h
+++ b/include/asm-ppc64/pci.h
@@ -136,6 +136,13 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 unsigned long size,
 					 pgprot_t prot);
 
+#ifdef CONFIG_PPC_MULTIPLATFORM
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
+				 const struct resource *rsrc,
+				 u64 *start, u64 *end);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
 
 #endif	/* __KERNEL__ */
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e9844929fe..cfa1455848f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1020,6 +1020,20 @@ static inline char *pci_name(struct pci_dev *pdev)
 #define pci_pretty_name(dev) ""
 #endif
 
+
+/* Some archs don't want to expose struct resource to userland as-is
+ * in sysfs and /proc
+ */
+#ifndef HAVE_ARCH_PCI_RESOURCE_TO_USER
+static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
+                const struct resource *rsrc, u64 *start, u64 *end)
+{
+	*start = rsrc->start;
+	*end = rsrc->end;
+}
+#endif /* HAVE_ARCH_PCI_RESOURCE_TO_USER */
+
+
 /*
  *  The world is not perfect and supplies us with broken PCI devices.
  *  For at least a part of these bugs we need a work-around, so both
-- 
cgit v1.2.3-70-g09d2


From e24c2d963a604d9eaa560c90371fa387d3eec8f1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 2 Jun 2005 12:55:50 -0700
Subject: [PATCH] PCI: DMA bursting advice

After seeing, at best, "guesses" as to the following kind
of information in several drivers, I decided that we really
need a way for platforms to specifically give advice in this
area for what works best with their PCI controller implementation.

Basically, this new interface gives DMA bursting advice on
PCI.  There are three forms of the advice:

1) Burst as much as possible, it is not necessary to end bursts
   on some particular boundary for best performance.

2) Burst on some byte count multiple.  A DMA burst to some multiple of
   number of bytes may be done, but it is important to end the burst
   on an exact multiple for best performance.

   The best example of this I am aware of are the PPC64 PCI
   controllers, where if you end a burst mid-cacheline then
   chip has to refetch the data and the IOMMU translations
   which hurts performance a lot.

3) Burst on a single byte count multiple.  Bursts shall end
   exactly on the next multiple boundary for best performance.

   Sparc64 and Alpha's PCI controllers operate this way.  They
   disconnect any device which tries to burst across a cacheline
   boundary.

   Actually, newer sparc64 PCI controllers do not have this behavior.
   That is why the "pdev" is passed into the interface, so I can
   add code later to check which PCI controller the system is using
   and give advice accordingly.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/asm-alpha/pci.h   | 17 +++++++++++++++++
 include/asm-arm/pci.h     |  8 ++++++++
 include/asm-frv/pci.h     |  8 ++++++++
 include/asm-i386/pci.h    |  8 ++++++++
 include/asm-ia64/pci.h    | 17 +++++++++++++++++
 include/asm-mips/pci.h    |  8 ++++++++
 include/asm-parisc/pci.h  | 17 +++++++++++++++++
 include/asm-ppc/pci.h     |  8 ++++++++
 include/asm-ppc64/pci.h   | 17 +++++++++++++++++
 include/asm-sh/pci.h      |  8 ++++++++
 include/asm-sh64/pci.h    |  8 ++++++++
 include/asm-sparc/pci.h   |  8 ++++++++
 include/asm-sparc64/pci.h | 17 +++++++++++++++++
 include/asm-v850/pci.h    |  8 ++++++++
 include/asm-x86_64/pci.h  |  8 ++++++++
 include/linux/pci.h       |  9 +++++++++
 16 files changed, 174 insertions(+)

(limited to 'include/linux')

diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index 0c7b57bc043..6c71dc1ad4c 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -223,6 +223,23 @@ pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr,
 	/* Nothing to do. */
 }
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_BOUNDARY;
+	*strategy_parameter = cacheline_size;
+}
+
 /* TODO: integrate with include/asm-generic/pci.h ? */
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/include/asm-arm/pci.h b/include/asm-arm/pci.h
index 40ffaefbeb1..bc2ec425aca 100644
--- a/include/asm-arm/pci.h
+++ b/include/asm-arm/pci.h
@@ -42,6 +42,14 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define pci_unmap_len(PTR, LEN_NAME)		((PTR)->LEN_NAME)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	(((PTR)->LEN_NAME) = (VAL))
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
                                enum pci_mmap_state mmap_state, int write_combine);
diff --git a/include/asm-frv/pci.h b/include/asm-frv/pci.h
index a6a469231f6..13427240664 100644
--- a/include/asm-frv/pci.h
+++ b/include/asm-frv/pci.h
@@ -57,6 +57,14 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
  */
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 /*
  *	These are pretty much arbitary with the CoMEM implementation.
  *	We have the whole address space to ourselves.
diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h
index fb749b85a73..bf07b3af85e 100644
--- a/include/asm-i386/pci.h
+++ b/include/asm-i386/pci.h
@@ -99,6 +99,14 @@ static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
 }
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 #endif /* __KERNEL__ */
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h
index a8314ee4e7d..c9f1ab4e477 100644
--- a/include/asm-ia64/pci.h
+++ b/include/asm-ia64/pci.h
@@ -82,6 +82,23 @@ extern int pcibios_prep_mwi (struct pci_dev *);
 #define sg_dma_len(sg)		((sg)->dma_length)
 #define sg_dma_address(sg)	((sg)->dma_address)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 				enum pci_mmap_state mmap_state, int write_combine);
diff --git a/include/asm-mips/pci.h b/include/asm-mips/pci.h
index c9c576b4855..20b93bfa456 100644
--- a/include/asm-mips/pci.h
+++ b/include/asm-mips/pci.h
@@ -130,6 +130,14 @@ extern void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
 extern void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction);
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 extern void pcibios_resource_to_bus(struct pci_dev *dev,
 	struct pci_bus_region *region, struct resource *res);
 
diff --git a/include/asm-parisc/pci.h b/include/asm-parisc/pci.h
index 0763c2982fb..f9f5bf90111 100644
--- a/include/asm-parisc/pci.h
+++ b/include/asm-parisc/pci.h
@@ -230,6 +230,23 @@ extern inline void pcibios_register_hba(struct pci_hba_data *x)
 /* export the pci_ DMA API in terms of the dma_ one */
 #include <asm-generic/pci-dma-compat.h>
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+
 extern void
 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 			 struct resource *res);
diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h
index 002e7b30577..669e9de7a52 100644
--- a/include/asm-ppc/pci.h
+++ b/include/asm-ppc/pci.h
@@ -69,6 +69,14 @@ extern unsigned long pci_bus_to_phys(unsigned int ba, int busnr);
 #define pci_unmap_len(PTR, LEN_NAME)		(0)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 /*
  * At present there are very few 32-bit PPC machines that can have
  * memory above the 4GB point, and we don't support that.
diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h
index 411bf5dee39..20beb10c090 100644
--- a/include/asm-ppc64/pci.h
+++ b/include/asm-ppc64/pci.h
@@ -78,6 +78,23 @@ static inline int pci_dac_dma_supported(struct pci_dev *hwdev,u64 mask)
 	return 0;
 }
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_MULTIPLE;
+	*strategy_parameter = cacheline_size;
+}
+
 extern int pci_domain_nr(struct pci_bus *bus);
 
 /* Decide whether to display the domain number in /proc */
diff --git a/include/asm-sh/pci.h b/include/asm-sh/pci.h
index 9c3b63d0105..7237bc6a728 100644
--- a/include/asm-sh/pci.h
+++ b/include/asm-sh/pci.h
@@ -96,6 +96,14 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define sg_dma_address(sg)	(virt_to_bus((sg)->dma_address))
 #define sg_dma_len(sg)		((sg)->length)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 /* Board-specific fixup routines. */
 extern void pcibios_fixup(void);
 extern void pcibios_fixup_irqs(void);
diff --git a/include/asm-sh64/pci.h b/include/asm-sh64/pci.h
index 8cc14e13975..0ac15ab01cc 100644
--- a/include/asm-sh64/pci.h
+++ b/include/asm-sh64/pci.h
@@ -86,6 +86,14 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define sg_dma_address(sg)	((sg)->dma_address)
 #define sg_dma_len(sg)		((sg)->length)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 /* Board-specific fixup routines. */
 extern void pcibios_fixup(void);
 extern void pcibios_fixup_irqs(void);
diff --git a/include/asm-sparc/pci.h b/include/asm-sparc/pci.h
index d200a25a737..2fd65db95e9 100644
--- a/include/asm-sparc/pci.h
+++ b/include/asm-sparc/pci.h
@@ -144,6 +144,14 @@ extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 
 #define pci_dac_dma_supported(dev, mask)	(0)
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
 }
diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h
index 2a0c85cd1c1..402667300d0 100644
--- a/include/asm-sparc64/pci.h
+++ b/include/asm-sparc64/pci.h
@@ -220,6 +220,23 @@ static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
 	return (dma_addr == PCI_DMA_ERROR_CODE);
 }
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	unsigned long cacheline_size;
+	u8 byte;
+
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+	if (byte == 0)
+		cacheline_size = 1024;
+	else
+		cacheline_size = (int) byte * 4;
+
+	*strat = PCI_DMA_BURST_BOUNDARY;
+	*strategy_parameter = cacheline_size;
+}
+
 /* Return the index of the PCI controller for device PDEV. */
 
 extern int pci_domain_nr(struct pci_bus *bus);
diff --git a/include/asm-v850/pci.h b/include/asm-v850/pci.h
index e41941447b4..d26eb8d6731 100644
--- a/include/asm-v850/pci.h
+++ b/include/asm-v850/pci.h
@@ -81,6 +81,14 @@ extern void
 pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
 		     dma_addr_t dma_addr);
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
 }
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 8712520ca47..8461d6af102 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -123,6 +123,14 @@ pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr,
 	flush_write_buffers();
 }
 
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+					enum pci_dma_burst_strategy *strat,
+					unsigned long *strategy_parameter)
+{
+	*strat = PCI_DMA_BURST_INFINITY;
+	*strategy_parameter = ~0UL;
+}
+
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			       enum pci_mmap_state mmap_state, int write_combine);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cfa1455848f..9ce4f1be093 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -874,6 +874,15 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass
 #define	pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle)
 #define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
 
+enum pci_dma_burst_strategy {
+	PCI_DMA_BURST_INFINITY,	/* make bursts as large as possible,
+				   strategy_parameter is N/A */
+	PCI_DMA_BURST_BOUNDARY, /* disconnect at every strategy_parameter
+				   byte boundaries */
+	PCI_DMA_BURST_MULTIPLE, /* disconnect at some multiple of
+				   strategy_parameter byte boundaries */
+};
+
 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
 extern struct pci_dev *isa_bridge;
 #endif
-- 
cgit v1.2.3-70-g09d2


From bb4a61b6eaee01707f24deeefc5d7136f25f75c5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 6 Jun 2005 23:07:46 -0700
Subject: [PATCH] PCI: fix up errors after dma bursting patch and CONFIG_PCI=n

With CONFIG_PCI=n:

In file included from include/linux/pci.h:917,
                 from lib/iomap.c:6:
include/asm/pci.h:104: warning: `enum pci_dma_burst_strategy' declared inside parameter list
include/asm/pci.h:104: warning: its scope is only this definition or declaration, which is probably not what you want.
include/asm/pci.h: In function `pci_dma_burst_advice':
include/asm/pci.h:106: dereferencing pointer to incomplete type
include/asm/pci.h:106: `PCI_DMA_BURST_INFINITY' undeclared (first use in this function)
include/asm/pci.h:106: (Each undeclared identifier is reported only once
include/asm/pci.h:106: for each function it appears in.)
make[1]: *** [lib/iomap.o] Error 1

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/asm-alpha/pci.h   | 2 ++
 include/asm-arm/pci.h     | 2 ++
 include/asm-frv/pci.h     | 2 ++
 include/asm-i386/pci.h    | 2 ++
 include/asm-ia64/pci.h    | 2 ++
 include/asm-mips/pci.h    | 2 ++
 include/asm-parisc/pci.h  | 2 ++
 include/asm-ppc/pci.h     | 2 ++
 include/asm-ppc64/pci.h   | 2 ++
 include/asm-sh/pci.h      | 2 ++
 include/asm-sh64/pci.h    | 2 ++
 include/asm-sparc/pci.h   | 2 ++
 include/asm-sparc64/pci.h | 2 ++
 include/asm-v850/pci.h    | 2 ++
 include/asm-x86_64/pci.h  | 2 ++
 include/linux/pci.h       | 2 ++
 16 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index 6c71dc1ad4c..b7806aa3785 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -223,6 +223,7 @@ pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr,
 	/* Nothing to do. */
 }
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -239,6 +240,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_BOUNDARY;
 	*strategy_parameter = cacheline_size;
 }
+#endif
 
 /* TODO: integrate with include/asm-generic/pci.h ? */
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
diff --git a/include/asm-arm/pci.h b/include/asm-arm/pci.h
index bc2ec425aca..e300646fe65 100644
--- a/include/asm-arm/pci.h
+++ b/include/asm-arm/pci.h
@@ -42,6 +42,7 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define pci_unmap_len(PTR, LEN_NAME)		((PTR)->LEN_NAME)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	(((PTR)->LEN_NAME) = (VAL))
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -49,6 +50,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/include/asm-frv/pci.h b/include/asm-frv/pci.h
index 13427240664..b4efe5e3591 100644
--- a/include/asm-frv/pci.h
+++ b/include/asm-frv/pci.h
@@ -57,6 +57,7 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
  */
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -64,6 +65,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 /*
  *	These are pretty much arbitary with the CoMEM implementation.
diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h
index bf07b3af85e..3561899eb82 100644
--- a/include/asm-i386/pci.h
+++ b/include/asm-i386/pci.h
@@ -99,6 +99,7 @@ static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
 }
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -106,6 +107,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h
index c9f1ab4e477..0c4c5d801d3 100644
--- a/include/asm-ia64/pci.h
+++ b/include/asm-ia64/pci.h
@@ -82,6 +82,7 @@ extern int pcibios_prep_mwi (struct pci_dev *);
 #define sg_dma_len(sg)		((sg)->dma_length)
 #define sg_dma_address(sg)	((sg)->dma_address)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -98,6 +99,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_MULTIPLE;
 	*strategy_parameter = cacheline_size;
 }
+#endif
 
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/include/asm-mips/pci.h b/include/asm-mips/pci.h
index 20b93bfa456..2d323b6e147 100644
--- a/include/asm-mips/pci.h
+++ b/include/asm-mips/pci.h
@@ -130,6 +130,7 @@ extern void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
 extern void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction);
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -137,6 +138,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 extern void pcibios_resource_to_bus(struct pci_dev *dev,
 	struct pci_bus_region *region, struct resource *res);
diff --git a/include/asm-parisc/pci.h b/include/asm-parisc/pci.h
index f9f5bf90111..ee741c15017 100644
--- a/include/asm-parisc/pci.h
+++ b/include/asm-parisc/pci.h
@@ -230,6 +230,7 @@ extern inline void pcibios_register_hba(struct pci_hba_data *x)
 /* export the pci_ DMA API in terms of the dma_ one */
 #include <asm-generic/pci-dma-compat.h>
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -246,6 +247,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_MULTIPLE;
 	*strategy_parameter = cacheline_size;
 }
+#endif
 
 extern void
 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h
index 669e9de7a52..db0a2a0ec74 100644
--- a/include/asm-ppc/pci.h
+++ b/include/asm-ppc/pci.h
@@ -69,6 +69,7 @@ extern unsigned long pci_bus_to_phys(unsigned int ba, int busnr);
 #define pci_unmap_len(PTR, LEN_NAME)		(0)
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -76,6 +77,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 /*
  * At present there are very few 32-bit PPC machines that can have
diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h
index 20beb10c090..d12dfce21e2 100644
--- a/include/asm-ppc64/pci.h
+++ b/include/asm-ppc64/pci.h
@@ -78,6 +78,7 @@ static inline int pci_dac_dma_supported(struct pci_dev *hwdev,u64 mask)
 	return 0;
 }
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -94,6 +95,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_MULTIPLE;
 	*strategy_parameter = cacheline_size;
 }
+#endif
 
 extern int pci_domain_nr(struct pci_bus *bus);
 
diff --git a/include/asm-sh/pci.h b/include/asm-sh/pci.h
index 7237bc6a728..26044889c77 100644
--- a/include/asm-sh/pci.h
+++ b/include/asm-sh/pci.h
@@ -96,6 +96,7 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define sg_dma_address(sg)	(virt_to_bus((sg)->dma_address))
 #define sg_dma_len(sg)		((sg)->length)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -103,6 +104,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 /* Board-specific fixup routines. */
 extern void pcibios_fixup(void);
diff --git a/include/asm-sh64/pci.h b/include/asm-sh64/pci.h
index 0ac15ab01cc..c68870e02d9 100644
--- a/include/asm-sh64/pci.h
+++ b/include/asm-sh64/pci.h
@@ -86,6 +86,7 @@ static inline void pcibios_penalize_isa_irq(int irq)
 #define sg_dma_address(sg)	((sg)->dma_address)
 #define sg_dma_len(sg)		((sg)->length)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -93,6 +94,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 /* Board-specific fixup routines. */
 extern void pcibios_fixup(void);
diff --git a/include/asm-sparc/pci.h b/include/asm-sparc/pci.h
index 2fd65db95e9..44bb38758c9 100644
--- a/include/asm-sparc/pci.h
+++ b/include/asm-sparc/pci.h
@@ -144,6 +144,7 @@ extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 
 #define pci_dac_dma_supported(dev, mask)	(0)
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -151,6 +152,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
diff --git a/include/asm-sparc64/pci.h b/include/asm-sparc64/pci.h
index 402667300d0..84e41c1ef3f 100644
--- a/include/asm-sparc64/pci.h
+++ b/include/asm-sparc64/pci.h
@@ -220,6 +220,7 @@ static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
 	return (dma_addr == PCI_DMA_ERROR_CODE);
 }
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -236,6 +237,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_BOUNDARY;
 	*strategy_parameter = cacheline_size;
 }
+#endif
 
 /* Return the index of the PCI controller for device PDEV. */
 
diff --git a/include/asm-v850/pci.h b/include/asm-v850/pci.h
index d26eb8d6731..8e79be0fe99 100644
--- a/include/asm-v850/pci.h
+++ b/include/asm-v850/pci.h
@@ -81,6 +81,7 @@ extern void
 pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
 		     dma_addr_t dma_addr);
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -88,6 +89,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 8461d6af102..c1961db88fa 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -123,6 +123,7 @@ pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr,
 	flush_write_buffers();
 }
 
+#ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -130,6 +131,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#endif
 
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9ce4f1be093..66798b46f30 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -985,6 +985,8 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif
 
+#define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0)
+
 #endif /* !CONFIG_PCI */
 
 /* these helpers provide future and backwards compatibility
-- 
cgit v1.2.3-70-g09d2


From 545493917dc90298e1c38f018ad893f5518928e7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 23 Jun 2005 17:35:56 -0700
Subject: [PATCH] PCI: add proper MCFG table parsing to ACPI core.

This patch is the first step in properly handling the MCFG PCI table.
It defines the structures properly, and saves off the table so that the
pci mmconfig code can access it.  It moves the parsing of the table a
little later in the boot process, but still before the information is
needed.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/i386/kernel/acpi/boot.c | 41 +++++++++++++++++++++++++++++++++--------
 arch/i386/pci/mmconfig.c     | 12 +++++++-----
 arch/x86_64/pci/mmconfig.c   | 16 +++++++++-------
 include/linux/acpi.h         | 16 +++++++++++++---
 4 files changed, 62 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index f5360d88bcf..b7808a89d94 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -159,9 +159,15 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
 #endif
 
 #ifdef CONFIG_PCI_MMCONFIG
-static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
 {
 	struct acpi_table_mcfg *mcfg;
+	unsigned long i;
+	int config_size;
 
 	if (!phys_addr || !size)
 		return -EINVAL;
@@ -172,18 +178,38 @@ static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
 		return -ENODEV;
 	}
 
-	if (mcfg->base_reserved) {
-		printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+	/* how many config structures do we have */
+	pci_mmcfg_config_num = 0;
+	i = size - sizeof(struct acpi_table_mcfg);
+	while (i >= sizeof(struct acpi_table_mcfg_config)) {
+		++pci_mmcfg_config_num;
+		i -= sizeof(struct acpi_table_mcfg_config);
+	};
+	if (pci_mmcfg_config_num == 0) {
+		printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
 		return -ENODEV;
 	}
 
-	pci_mmcfg_base_addr = mcfg->base_address;
+	config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+	pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+	if (!pci_mmcfg_config) {
+		printk(KERN_WARNING PREFIX
+		       "No memory for MCFG config tables\n");
+		return -ENOMEM;
+	}
+
+	memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+	for (i = 0; i < pci_mmcfg_config_num; ++i) {
+		if (mcfg->config[i].base_reserved) {
+			printk(KERN_ERR PREFIX
+			       "MMCONFIG not in low 4GB of memory\n");
+			return -ENODEV;
+		}
+	}
 
 	return 0;
 }
-#else
-#define	acpi_parse_mcfg NULL
-#endif /* !CONFIG_PCI_MMCONFIG */
+#endif /* CONFIG_PCI_MMCONFIG */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static int __init
@@ -1139,7 +1165,6 @@ int __init acpi_boot_init(void)
 	acpi_process_madt();
 
 	acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
-	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
 
 	return 0;
 }
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 021a50aa51f..5fbaa913225 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -11,11 +11,9 @@
 
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include "pci.h"
 
-/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
-u32 pci_mmcfg_base_addr;
-
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
 
 /* The base address of the last MMCONFIG device accessed */
@@ -27,7 +25,7 @@ static u32 mmcfg_last_accessed_device;
 
 static inline void pci_exp_set_dev_base(int bus, int devfn)
 {
-	u32 dev_base = pci_mmcfg_base_addr | (bus << 20) | (devfn << 12);
+	u32 dev_base = pci_mmcfg_config[0].base_address | (bus << 20) | (devfn << 12);
 	if (dev_base != mmcfg_last_accessed_device) {
 		mmcfg_last_accessed_device = dev_base;
 		set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
@@ -101,7 +99,11 @@ static int __init pci_mmcfg_init(void)
 {
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		goto out;
-	if (!pci_mmcfg_base_addr)
+
+	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if ((pci_mmcfg_config_num == 0) ||
+	    (pci_mmcfg_config == NULL) ||
+	    (pci_mmcfg_config[0].base_address == 0))
 		goto out;
 
 	/* Kludge for now. Don't use mmconfig on AMD systems because
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index b693c232fd0..09cfcc1234b 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -7,15 +7,13 @@
 
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/acpi.h>
 #include "pci.h"
 
 #define MMCONFIG_APER_SIZE (256*1024*1024)
 
-/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
-u32 pci_mmcfg_base_addr;
-
 /* Static virtual mapping of the MMCONFIG aperture */
-char *pci_mmcfg_virt;
+static char *pci_mmcfg_virt;
 
 static inline char *pci_dev_base(unsigned int bus, unsigned int devfn)
 {
@@ -77,7 +75,11 @@ static int __init pci_mmcfg_init(void)
 {
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return 0;
-	if (!pci_mmcfg_base_addr)
+
+	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+	if ((pci_mmcfg_config_num == 0) ||
+	    (pci_mmcfg_config == NULL) ||
+	    (pci_mmcfg_config[0].base_address == 0))
 		return 0;
 
 	/* Kludge for now. Don't use mmconfig on AMD systems because
@@ -88,13 +90,13 @@ static int __init pci_mmcfg_init(void)
 		return 0; 
 
 	/* RED-PEN i386 doesn't do _nocache right now */
-	pci_mmcfg_virt = ioremap_nocache(pci_mmcfg_base_addr, MMCONFIG_APER_SIZE);
+	pci_mmcfg_virt = ioremap_nocache(pci_mmcfg_config[0].base_address, MMCONFIG_APER_SIZE);
 	if (!pci_mmcfg_virt) { 
 		printk("PCI: Cannot map mmconfig aperture\n");
 		return 0;
 	}	
 
-	printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_base_addr);
+	printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[0].base_address);
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index f5bc298707e..ef8483673aa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -342,11 +342,19 @@ struct acpi_table_ecdt {
 
 /* PCI MMCONFIG */
 
+/* Defined in PCI Firmware Specification 3.0 */
+struct acpi_table_mcfg_config {
+	u32				base_address;
+	u32				base_reserved;
+	u16				pci_segment_group_number;
+	u8				start_bus_number;
+	u8				end_bus_number;
+	u8				reserved[4];
+} __attribute__ ((packed));
 struct acpi_table_mcfg {
 	struct acpi_table_header	header;
 	u8				reserved[8];
-	u32				base_address;
-	u32				base_reserved;
+	struct acpi_table_mcfg_config	config[0];
 } __attribute__ ((packed));
 
 /* Table Handlers */
@@ -391,6 +399,7 @@ int acpi_table_parse (enum acpi_table_id id, acpi_table_handler handler);
 int acpi_get_table_header_early (enum acpi_table_id id, struct acpi_table_header **header);
 int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
 int acpi_table_parse_srat (enum acpi_srat_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
+int acpi_parse_mcfg (unsigned long phys_addr, unsigned long size);
 void acpi_table_print (struct acpi_table_header *header, unsigned long phys_addr);
 void acpi_table_print_madt_entry (acpi_table_entry_header *madt);
 void acpi_table_print_srat_entry (acpi_table_entry_header *srat);
@@ -412,7 +421,8 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
 
 extern int acpi_mp_config;
 
-extern u32 pci_mmcfg_base_addr;
+extern struct acpi_table_mcfg_config *pci_mmcfg_config;
+extern int pci_mmcfg_config_num;
 
 extern int sbf_port ;
 
-- 
cgit v1.2.3-70-g09d2


From b3563c4fbff906991a1b4ef4609f99cca2a0de6a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 28 Jun 2005 12:54:43 -0700
Subject: [NETLINK]: Clear padding in netlink messages

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h   | 1 +
 include/linux/rtnetlink.h | 5 ++++-
 net/core/rtnetlink.c      | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 3029cad63a0..27e4d164a10 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -168,6 +168,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 	nlh->nlmsg_flags = flags;
 	nlh->nlmsg_pid = pid;
 	nlh->nlmsg_seq = seq;
+	memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
 	return nlh;
 }
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index d021888b58f..dc26e82ba0f 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -898,7 +898,9 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 	memcpy(skb_put(skb, attrlen), data, attrlen); })
 
 #define RTA_PUT_NOHDR(skb, attrlen, data) \
-	RTA_APPEND(skb, RTA_ALIGN(attrlen), data)
+({	RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \
+	memset(skb->tail - (RTA_ALIGN(attrlen) - attrlen), 0, \
+	       RTA_ALIGN(attrlen) - attrlen); })
 
 #define RTA_PUT_U8(skb, attrtype, value) \
 ({	u8 _tmp = (value); \
@@ -978,6 +980,7 @@ __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
 	rta->rta_type = attrtype;
 	rta->rta_len = size;
+	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
 	return rta;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e013d836a7a..879237c378f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -126,6 +126,7 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
 	rta->rta_type = attrtype;
 	rta->rta_len = size;
 	memcpy(RTA_DATA(rta), data, attrlen);
+	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
 }
 
 size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
-- 
cgit v1.2.3-70-g09d2


From 8a47077a0b5aa2649751c46e7a27884e6686ccbf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 28 Jun 2005 12:56:45 -0700
Subject: [NETLINK]: Missing padding fields in dumped structures

Plug holes with padding fields and initialized them to zero.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h   | 1 +
 include/linux/pkt_sched.h | 9 ++++++---
 include/linux/rtnetlink.h | 5 +++++
 net/ipv6/addrconf.c       | 3 +++
 net/sched/cls_rsvp.h      | 1 +
 net/sched/sch_cbq.c       | 1 +
 6 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 25d2d67c1fa..bd2c5a2bbbf 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -276,6 +276,7 @@ struct tc_rsvp_pinfo
 	__u8	protocol;
 	__u8	tunnelid;
 	__u8	tunnelhdr;
+	__u8	pad;
 };
 
 /* ROUTE filter */
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 1d9da36eb9d..60ffcb9c579 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -221,9 +221,11 @@ struct tc_gred_qopt
 /* gred setup */
 struct tc_gred_sopt
 {
-       __u32           DPs;
-       __u32           def_DP;
-       __u8            grio;
+       __u32		DPs;
+       __u32		def_DP;
+       __u8		grio;
+       __u8		pad1;
+       __u16		pad2;
 };
 
 /* HTB section */
@@ -351,6 +353,7 @@ struct tc_cbq_ovl
 #define	TC_CBQ_OVL_DROP		3
 #define	TC_CBQ_OVL_RCLASSIC	4
 	unsigned char	priority2;
+	__u16		pad;
 	__u32		penalty;
 };
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index dc26e82ba0f..657c05ab8f9 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -363,6 +363,8 @@ enum
 struct rta_session
 {
 	__u8	proto;
+	__u8	pad1;
+	__u16	pad2;
 
 	union {
 		struct {
@@ -635,10 +637,13 @@ struct ifinfomsg
 struct prefixmsg
 {
 	unsigned char	prefix_family;
+	unsigned char	prefix_pad1;
+	unsigned short	prefix_pad2;
 	int		prefix_ifindex;
 	unsigned char	prefix_type;
 	unsigned char	prefix_len;
 	unsigned char	prefix_flags;
+	unsigned char	prefix_pad3;
 };
 
 enum 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8140bed78a2..1b2902d8eb9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3031,9 +3031,12 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
 	pmsg = NLMSG_DATA(nlh);
 	pmsg->prefix_family = AF_INET6;
+	pmsg->prefix_pad1 = 0;
+	pmsg->prefix_pad2 = 0;
 	pmsg->prefix_ifindex = idev->dev->ifindex;
 	pmsg->prefix_len = pinfo->prefix_len;
 	pmsg->prefix_type = pinfo->type;
+	pmsg->prefix_pad3 = 0;
 	
 	pmsg->prefix_flags = 0;
 	if (pinfo->onlink)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 232fb919681..006168d6937 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -618,6 +618,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	pinfo.protocol = s->protocol;
 	pinfo.tunnelid = s->tunnelid;
 	pinfo.tunnelhdr = f->tunnelhdr;
+	pinfo.pad = 0;
 	RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 	if (f->res.classid)
 		RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index baeb3111f75..09453f997d8 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1528,6 +1528,7 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 
 	opt.strategy = cl->ovl_strategy;
 	opt.priority2 = cl->priority2+1;
+	opt.pad = 0;
 	opt.penalty = (cl->penalty*1000)/HZ;
 	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
 	return skb->len;
-- 
cgit v1.2.3-70-g09d2


From 2f85a42964dd43fed3a339701db046bee5a8b903 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 28 Jun 2005 13:24:23 -0700
Subject: [SCTP] Make init & delayed sack timeouts configurable by user.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h       |  1 +
 include/net/sctp/constants.h | 18 +++---------------
 include/net/sctp/structs.h   |  4 ++++
 net/sctp/endpointola.c       | 13 +++++--------
 net/sctp/protocol.c          |  5 ++++-
 net/sctp/sysctl.c            | 13 +++++++++++++
 net/sctp/transport.c         |  1 -
 7 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ebfe1250f0a..5b5f434ac9a 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -641,6 +641,7 @@ enum {
 	NET_SCTP_ADDIP_ENABLE		 = 13,
 	NET_SCTP_PRSCTP_ENABLE		 = 14,
 	NET_SCTP_SNDBUF_POLICY		 = 15,
+	NET_SCTP_SACK_TIMEOUT		 = 16,
 };
 
 /* /proc/sys/net/bridge */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 4868c7f7749..5999e5684bb 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -263,23 +263,11 @@ enum { SCTP_MIN_PMTU = 576 };
 enum { SCTP_MAX_DUP_TSNS = 16 };
 enum { SCTP_MAX_GABS = 16 };
 
-/* Here we define the default timers.  */
+/* Heartbeat interval - 30 secs */
+#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(30 * HZ)
 
-/* cookie timer def = ? seconds */
-#define SCTP_DEFAULT_TIMEOUT_T1_COOKIE	(3 * HZ)
-
-/* init timer def = 3 seconds  */
-#define SCTP_DEFAULT_TIMEOUT_T1_INIT	(3 * HZ)
-
-/* shutdown timer def = 300 ms */
-#define SCTP_DEFAULT_TIMEOUT_T2_SHUTDOWN ((300 * HZ) / 1000)
-
-/* 0 seconds + RTO */
-#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(10 * HZ)
-
-/* recv timer def = 200ms (in usec) */
+/* Delayed sack timer - 200ms */
 #define SCTP_DEFAULT_TIMEOUT_SACK	((200 * HZ) / 1000)
-#define SCTP_DEFAULT_TIMEOUT_SACK_MAX	((500 * HZ) / 1000) /* 500 ms */
 
 /* RTO.Initial              - 3  seconds
  * RTO.Min                  - 1  second
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index dfad4d3c581..47727c7cc62 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -161,6 +161,9 @@ extern struct sctp_globals {
 	 */
 	int sndbuf_policy;
 
+	/* Delayed SACK timeout  200ms default*/
+	int sack_timeout;
+
 	/* HB.interval		    - 30 seconds  */
 	int hb_interval;
 
@@ -217,6 +220,7 @@ extern struct sctp_globals {
 #define sctp_sndbuf_policy	 	(sctp_globals.sndbuf_policy)
 #define sctp_max_retrans_path		(sctp_globals.max_retrans_path)
 #define sctp_max_retrans_init		(sctp_globals.max_retrans_init)
+#define sctp_sack_timeout		(sctp_globals.sack_timeout)
 #define sctp_hb_interval		(sctp_globals.hb_interval)
 #define sctp_max_instreams		(sctp_globals.max_instreams)
 #define sctp_max_outstreams		(sctp_globals.max_outstreams)
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 2ec0320fac3..c44bf4165c6 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -102,9 +102,9 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	/* Set up the base timeout information.  */
 	ep->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
 	ep->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
-		SCTP_DEFAULT_TIMEOUT_T1_COOKIE;
+		msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	ep->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
-		SCTP_DEFAULT_TIMEOUT_T1_INIT;
+		msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	ep->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] =
 		msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	ep->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] = 0;
@@ -117,12 +117,9 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
         ep->timeouts[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]
 		= 5 * msecs_to_jiffies(sp->rtoinfo.srto_max);
 
-	ep->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] =
-		SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
-	ep->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
-		SCTP_DEFAULT_TIMEOUT_SACK;
-	ep->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
-		sp->autoclose * HZ;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_SACK] = sctp_sack_timeout;
+	ep->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ;
 
 	/* Use SCTP specific send buffer space queues.  */
 	ep->sndbuf_policy = sctp_sndbuf_policy;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5135e1a25d2..e7f37faba7c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1050,7 +1050,10 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_sndbuf_policy		= 0;
 
 	/* HB.interval              - 30 seconds */
-	sctp_hb_interval		= 30 * HZ;
+	sctp_hb_interval		= SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+
+	/* delayed SACK timeout */
+	sctp_sack_timeout		= SCTP_DEFAULT_TIMEOUT_SACK;
 
 	/* Implementation specific variables. */
 
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7fc31849312..dc4893474f1 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -47,6 +47,8 @@
 static ctl_handler sctp_sysctl_jiffies_ms;
 static long rto_timer_min = 1;
 static long rto_timer_max = 86400000; /* One day */
+static long sack_timer_min = 1;
+static long sack_timer_max = 500;
 
 static ctl_table sctp_table[] = {
 	{
@@ -187,6 +189,17 @@ static ctl_table sctp_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_SCTP_SACK_TIMEOUT,
+		.procname	= "sack_timeout",
+		.data		= &sctp_sack_timeout,
+		.maxlen		= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
+		.strategy	= &sctp_sysctl_jiffies_ms,
+		.extra1         = &sack_timer_min,
+		.extra2         = &sack_timer_max,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 0ec0fde6e6c..a63b6917960 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -103,7 +103,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 
 	/* Set up the heartbeat timer. */
 	init_timer(&peer->hb_timer);
-	peer->hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
 	peer->hb_timer.function = sctp_generate_heartbeat_event;
 	peer->hb_timer.data = (unsigned long)peer;
 
-- 
cgit v1.2.3-70-g09d2


From 7fe40f73d7591b38f129fe6a9c0fa46e0b192d09 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 28 Jun 2005 15:46:24 -0700
Subject: [IPV6]: remove more unused IPV6_AUTHHDR things.

Remove two more unused IPV6_AUTHHDR option things,
which I failed to remove them last time,
plus, mark IPV6_AUTHHDR obsolete.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      | 2 +-
 include/net/ipv6.h       | 1 -
 net/ipv6/ip6_flowlabel.c | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index f8256c58284..dcf5720ffcb 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -156,7 +156,7 @@ struct in6_flowlabel_req
 #define IPV6_CHECKSUM		7
 #define IPV6_HOPLIMIT		8
 #define IPV6_NEXTHOP		9
-#define IPV6_AUTHHDR		10
+#define IPV6_AUTHHDR		10	/* obsolete */
 #define IPV6_FLOWINFO		11
 
 #define IPV6_UNICAST_HOPS	16
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 771b47e30f8..69324465e8b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -183,7 +183,6 @@ struct ipv6_txoptions
 	struct ipv6_opt_hdr	*hopopt;
 	struct ipv6_opt_hdr	*dst0opt;
 	struct ipv6_rt_hdr	*srcrt;	/* Routing Header */
-	struct ipv6_opt_hdr	*auth;
 	struct ipv6_opt_hdr	*dst1opt;
 
 	/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 0e5f7499deb..b6c73da5ff3 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -244,7 +244,6 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
 		opt_space->opt_nflen = 0;
 	}
 	opt_space->dst1opt = fopt->dst1opt;
-	opt_space->auth = fopt->auth;
 	opt_space->opt_flen = fopt->opt_flen;
 	return opt_space;
 }
-- 
cgit v1.2.3-70-g09d2


From bcd61272db5e643b6d9c01c9d5085b914d9f19df Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arndb@de.ibm.com>
Date: Tue, 28 Jun 2005 15:58:50 -0700
Subject: [NET]: Add missing include to linux/netdevice.h

linux/etherdevice.h can't be included standalone at the moment, which
is required in order to sort the header files in the recommended
alphabetic order. This patch fixes that and is needed to build spider_net.

Signed-off-by: Arnd Bergmann <arndb@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 8a2df4dfbc5..cf3847edc50 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -25,6 +25,7 @@
 #define _LINUX_ETHERDEVICE_H
 
 #include <linux/if_ether.h>
+#include <linux/netdevice.h>
 #include <linux/random.h>
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3-70-g09d2


From 05133fc498e788e1c1ca4e906f9e05d9779fd63b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 28 Jun 2005 20:44:54 -0700
Subject: [PATCH] swabb.h warning fixes

In file included from drivers/media/dvb/ttpci/av7110_hw.c:38:
include/linux/byteorder/swabb.h:96: warning: type qualifiers ignored on function return type
include/linux/byteorder/swabb.h:110: warning: type qualifiers ignored on function return type
In file included from drivers/media/dvb/ttpci/av7110_v4l.c:36:
include/linux/byteorder/swabb.h:96: warning: type qualifiers ignored on function return type
include/linux/byteorder/swabb.h:110: warning: type qualifiers ignored on function return type
In file included from drivers/media/dvb/ttpci/av7110_av.c:37:
include/linux/byteorder/swabb.h:96: warning: type qualifiers ignored on function return type
include/linux/byteorder/swabb.h:110: warning: type qualifiers ignored on function return type
drivers/isdn/icn/icn.c:719:4: warning: #warning TODO test headroom or use skb->nb to flag ACK
In file included from drivers/media/dvb/ttpci/av7110_ca.c:39:
include/linux/byteorder/swabb.h:96: warning: type qualifiers ignored on function return type
include/linux/byteorder/swabb.h:110: warning: type qualifiers ignored on function return type
In file included from drivers/media/dvb/ttpci/av7110.c:41:
include/linux/byteorder/swabb.h:96: warning: type qualifiers ignored on function return type
include/linux/byteorder/swabb.h:110: warning: type qualifiers ignored on function return type

Does declaring a function to return a const value actually mean something to
gcc?

Dunno.  Kill it and replace sone `__inline__'s with `inline' too.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/byteorder/swabb.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h
index d28d9a804d3..d5f2a320510 100644
--- a/include/linux/byteorder/swabb.h
+++ b/include/linux/byteorder/swabb.h
@@ -92,29 +92,32 @@
 #endif /* OPTIMIZE */
 
 
-static __inline__ __const__ __u32 __fswahw32(__u32 x)
+static inline __u32 __fswahw32(__u32 x)
 {
 	return __arch__swahw32(x);
 }
-static __inline__ __u32 __swahw32p(__u32 *x)
+
+static inline __u32 __swahw32p(__u32 *x)
 {
 	return __arch__swahw32p(x);
 }
-static __inline__ void __swahw32s(__u32 *addr)
+
+static inline void __swahw32s(__u32 *addr)
 {
 	__arch__swahw32s(addr);
 }
 
-
-static __inline__ __const__ __u32 __fswahb32(__u32 x)
+static inline __u32 __fswahb32(__u32 x)
 {
 	return __arch__swahb32(x);
 }
-static __inline__ __u32 __swahb32p(__u32 *x)
+
+static inline __u32 __swahb32p(__u32 *x)
 {
 	return __arch__swahb32p(x);
 }
-static __inline__ void __swahb32s(__u32 *addr)
+
+static inline void __swahb32s(__u32 *addr)
 {
 	__arch__swahb32s(addr);
 }
-- 
cgit v1.2.3-70-g09d2


From 687a21cee17000177b1935896b9b475acf136678 Mon Sep 17 00:00:00 2001
From: Pekka J Enberg <penberg@cs.Helsinki.FI>
Date: Tue, 28 Jun 2005 20:44:55 -0700
Subject: [PATCH] rename wakeup_bdflush to wakeup_pdflush

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c               | 4 ++--
 include/linux/writeback.h | 2 +-
 mm/page-writeback.c       | 2 +-
 mm/vmscan.c               | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 13e5938a64f..561e63a1496 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -278,7 +278,7 @@ EXPORT_SYMBOL(thaw_bdev);
  */
 static void do_sync(unsigned long wait)
 {
-	wakeup_bdflush(0);
+	wakeup_pdflush(0);
 	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
 	DQUOT_SYNC(NULL);
 	sync_supers();		/* Write the superblocks */
@@ -497,7 +497,7 @@ static void free_more_memory(void)
 	struct zone **zones;
 	pg_data_t *pgdat;
 
-	wakeup_bdflush(1024);
+	wakeup_pdflush(1024);
 	yield();
 
 	for_each_pgdat(pgdat) {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d5c3fe1bf33..542dbaee651 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -85,7 +85,7 @@ static inline void wait_on_inode(struct inode *inode)
 /*
  * mm/page-writeback.c
  */
-int wakeup_bdflush(long nr_pages);
+int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 613b99a5591..a6329fa8f86 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -354,7 +354,7 @@ static void background_writeout(unsigned long _min_pages)
  * the whole world.  Returns 0 if a pdflush thread was dispatched.  Returns
  * -1 if all pdflush threads were busy.
  */
-int wakeup_bdflush(long nr_pages)
+int wakeup_pdflush(long nr_pages)
 {
 	if (nr_pages == 0) {
 		struct writeback_state wbs;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1fa312a8db7..cfffe5098d5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -972,7 +972,7 @@ int try_to_free_pages(struct zone **zones, unsigned int gfp_mask)
 		 * writeout.  So in laptop mode, write out the whole world.
 		 */
 		if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) {
-			wakeup_bdflush(laptop_mode ? 0 : total_scanned);
+			wakeup_pdflush(laptop_mode ? 0 : total_scanned);
 			sc.may_writepage = 1;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From 334a13ec3d01a1a4b4f2249735b793105cb4a519 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Jun 2005 20:44:58 -0700
Subject: [PATCH] really remove xattr_acl.h

Looks like it sneaked back with the NFS ACL merge..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs3acl.c          | 14 ++++++-------
 fs/nfsd/vfs.c             | 13 ++++++------
 include/linux/xattr_acl.h | 50 -----------------------------------------------
 3 files changed, 13 insertions(+), 64 deletions(-)
 delete mode 100644 include/linux/xattr_acl.h

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index ee3536fc84a..1b7a3ef2f81 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -2,7 +2,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/nfsacl.h>
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
@@ -53,9 +53,9 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
 	struct posix_acl *acl;
 	int type, error = 0;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
@@ -82,9 +82,9 @@ int nfs3_setxattr(struct dentry *dentry, const char *name,
 	struct posix_acl *acl;
 	int type, error;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
@@ -103,9 +103,9 @@ int nfs3_removexattr(struct dentry *dentry, const char *name)
 	struct inode *inode = dentry->d_inode;
 	int type;
 
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
 		type = ACL_TYPE_ACCESS;
-	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
 		type = ACL_TYPE_DEFAULT;
 	else
 		return -EOPNOTSUPP;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index de340ffd33c..be24ead89d9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -46,10 +46,9 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
-#include <linux/xattr_acl.h>
 #include <linux/posix_acl.h>
-#ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -1872,10 +1871,10 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
 		return ERR_PTR(-EOPNOTSUPP);
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name = XATTR_NAME_ACL_ACCESS;
+			name = POSIX_ACL_XATTR_ACCESS;
 			break;
 		case ACL_TYPE_DEFAULT:
-			name = XATTR_NAME_ACL_DEFAULT;
+			name = POSIX_ACL_XATTR_DEFAULT;
 			break;
 		default:
 			return ERR_PTR(-EOPNOTSUPP);
@@ -1919,17 +1918,17 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 		return -EOPNOTSUPP;
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name = XATTR_NAME_ACL_ACCESS;
+			name = POSIX_ACL_XATTR_ACCESS;
 			break;
 		case ACL_TYPE_DEFAULT:
-			name = XATTR_NAME_ACL_DEFAULT;
+			name = POSIX_ACL_XATTR_DEFAULT;
 			break;
 		default:
 			return -EOPNOTSUPP;
 	}
 
 	if (acl && acl->a_count) {
-		size = xattr_acl_size(acl->a_count);
+		size = posix_acl_xattr_size(acl->a_count);
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
diff --git a/include/linux/xattr_acl.h b/include/linux/xattr_acl.h
deleted file mode 100644
index 7a1f9b93a45..00000000000
--- a/include/linux/xattr_acl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-  File: linux/xattr_acl.h
-
-  (extended attribute representation of access control lists)
-
-  (C) 2000 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-*/
-
-#ifndef _LINUX_XATTR_ACL_H
-#define _LINUX_XATTR_ACL_H
-
-#include <linux/posix_acl.h>
-
-#define XATTR_NAME_ACL_ACCESS	"system.posix_acl_access"
-#define XATTR_NAME_ACL_DEFAULT	"system.posix_acl_default"
-
-#define XATTR_ACL_VERSION	0x0002
-
-typedef struct {
-	__u16		e_tag;
-	__u16		e_perm;
-	__u32		e_id;
-} xattr_acl_entry;
-
-typedef struct {
-	__u32		a_version;
-	xattr_acl_entry	a_entries[0];
-} xattr_acl_header;
-
-static inline size_t xattr_acl_size(int count)
-{
-	return sizeof(xattr_acl_header) + count * sizeof(xattr_acl_entry);
-}
-
-static inline int xattr_acl_count(size_t size)
-{
-	if (size < sizeof(xattr_acl_header))
-		return -1;
-	size -= sizeof(xattr_acl_header);
-	if (size % sizeof(xattr_acl_entry))
-		return -1;
-	return size / sizeof(xattr_acl_entry);
-}
-
-struct posix_acl * posix_acl_from_xattr(const void *value, size_t size);
-int posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size);
-
-
-
-#endif /* _LINUX_XATTR_ACL_H */
-- 
cgit v1.2.3-70-g09d2


From 3607d1dfc80dcfbd3a6f236c70aa0d8eb7292278 Mon Sep 17 00:00:00 2001
From: GOTO Masanori <gotom@debian.or.jp>
Date: Tue, 28 Jun 2005 20:45:04 -0700
Subject: [PATCH] headers: include linux/compiler.h for __user

This patch lets i2c-dev.h include linux/compiler.h so that __user is defined.

Signed-off-by: GOTO Masanori <gotom@debian.or.jp>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2c-dev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-dev.h b/include/linux/i2c-dev.h
index d228230ffe5..54169567976 100644
--- a/include/linux/i2c-dev.h
+++ b/include/linux/i2c-dev.h
@@ -25,6 +25,7 @@
 #define _LINUX_I2C_DEV_H
 
 #include <linux/types.h>
+#include <linux/compiler.h>
 
 /* Some IOCTL commands are defined in <linux/i2c.h> */
 /* Note: 10-bit addresses are NOT supported! */
-- 
cgit v1.2.3-70-g09d2


From 4cceb4d13abaedbd52e54053367c793ed4aedb6b Mon Sep 17 00:00:00 2001
From: GOTO Masanori <gotom@debian.or.jp>
Date: Tue, 28 Jun 2005 20:45:05 -0700
Subject: [PATCH] headers: include linux/types.h for usb_ch9.h

This patch for usb_ch9.h includes linux/types.h instead of asm/types.h so that
__le16 and so on is explicitly defined.  It also cleans up non standard //
comment.

Signed-off-by: GOTO Masanori <gotom@debian.or.jp>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/usb_ch9.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_ch9.h b/include/linux/usb_ch9.h
index 39e7ff4ffd2..ee21e6bf386 100644
--- a/include/linux/usb_ch9.h
+++ b/include/linux/usb_ch9.h
@@ -19,7 +19,7 @@
 #ifndef __LINUX_USB_CH9_H
 #define __LINUX_USB_CH9_H
 
-#include <asm/types.h>		/* __u8 etc */
+#include <linux/types.h>	/* __u8 etc */
 
 /*-------------------------------------------------------------------------*/
 
@@ -294,8 +294,8 @@ struct usb_endpoint_descriptor {
 	__le16 wMaxPacketSize;
 	__u8  bInterval;
 
-	// NOTE:  these two are _only_ in audio endpoints.
-	// use USB_DT_ENDPOINT*_SIZE in bLength, not sizeof.
+	/* NOTE:  these two are _only_ in audio endpoints. */
+	/* use USB_DT_ENDPOINT*_SIZE in bLength, not sizeof. */
 	__u8  bRefresh;
 	__u8  bSynchAddress;
 } __attribute__ ((packed));
-- 
cgit v1.2.3-70-g09d2


From fb3cc4320e1fd87143683b540e459a2e20fdc9bb Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Tue, 28 Jun 2005 20:45:15 -0700
Subject: [PATCH] blk: light iocontext ops

get_io_context needlessly turned off interrupts and checked for racing io
context creations.  Both of which aren't needed, because the io context can
only be created while in process context of the current process.

Also, split the function in 2.  A light version, current_io_context does not
elevate the reference count specifically, but can be used when in process
context, because the process holds a reference itself.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/ll_rw_blk.c | 56 +++++++++++++++++++++--------------------------
 include/linux/blkdev.h    |  1 +
 2 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 5caebe2cf0a..1197462bb6b 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1876,7 +1876,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
-	struct io_context *ioc = get_io_context(GFP_ATOMIC);
+	struct io_context *ioc = current_io_context(GFP_ATOMIC);
 
 	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
 		goto out;
@@ -1959,7 +1959,6 @@ rq_starved:
 	rq_init(q, rq);
 	rq->rl = rl;
 out:
-	put_io_context(ioc);
 	return rq;
 }
 
@@ -1997,9 +1996,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 			 * up to a big batch of them for a small period time.
 			 * See ioc_batching, ioc_set_batching
 			 */
-			ioc = get_io_context(GFP_NOIO);
+			ioc = current_io_context(GFP_NOIO);
 			ioc_set_batching(q, ioc);
-			put_io_context(ioc);
 
 			spin_lock_irq(q->queue_lock);
 		}
@@ -3282,24 +3280,20 @@ void exit_io_context(void)
 
 /*
  * If the current task has no IO context then create one and initialise it.
- * If it does have a context, take a ref on it.
+ * Otherwise, return its existing IO context.
  *
- * This is always called in the context of the task which submitted the I/O.
- * But weird things happen, so we disable local interrupts to ensure exclusive
- * access to *current.
+ * This returned IO context doesn't have a specifically elevated refcount,
+ * but since the current task itself holds a reference, the context can be
+ * used in general code, so long as it stays within `current` context.
  */
-struct io_context *get_io_context(int gfp_flags)
+struct io_context *current_io_context(int gfp_flags)
 {
 	struct task_struct *tsk = current;
-	unsigned long flags;
 	struct io_context *ret;
 
-	local_irq_save(flags);
 	ret = tsk->io_context;
-	if (ret)
-		goto out;
-
-	local_irq_restore(flags);
+	if (likely(ret))
+		return ret;
 
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
@@ -3310,25 +3304,25 @@ struct io_context *get_io_context(int gfp_flags)
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic = NULL;
+		tsk->io_context = ret;
+	}
 
-		local_irq_save(flags);
-
-		/*
-		 * very unlikely, someone raced with us in setting up the task
-		 * io context. free new context and just grab a reference.
-		 */
-		if (!tsk->io_context)
-			tsk->io_context = ret;
-		else {
-			kmem_cache_free(iocontext_cachep, ret);
-			ret = tsk->io_context;
-		}
+	return ret;
+}
+EXPORT_SYMBOL(current_io_context);
 
-out:
+/*
+ * If the current task has no IO context then create one and initialise it.
+ * If it does have a context, take a ref on it.
+ *
+ * This is always called in the context of the task which submitted the I/O.
+ */
+struct io_context *get_io_context(int gfp_flags)
+{
+	struct io_context *ret;
+	ret = current_io_context(gfp_flags);
+	if (likely(ret))
 		atomic_inc(&ret->refcount);
-		local_irq_restore(flags);
-	}
-
 	return ret;
 }
 EXPORT_SYMBOL(get_io_context);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 21a8674cd14..0881b5cdee3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -96,6 +96,7 @@ struct io_context {
 
 void put_io_context(struct io_context *ioc);
 void exit_io_context(void);
+struct io_context *current_io_context(int gfp_flags);
 struct io_context *get_io_context(int gfp_flags);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
-- 
cgit v1.2.3-70-g09d2


From 200803dfe4ff772740d63db725ab2f1b185ccf92 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Tue, 28 Jun 2005 20:45:18 -0700
Subject: [PATCH] irqpoll

Anyone reporting a stuck IRQ should try these options.  Its effectiveness
varies we've found in the Fedora case.  Quite a few systems with misdescribed
IRQ routing just work when you use irqpoll.  It also fixes up the VIA systems
although thats now fixed with the VIA quirk (which we could just make default
as its what Redmond OS does but Linus didn't like it historically).

A small number of systems have jammed IRQ sources or misdescribes that cause
an IRQ that we have no handler registered anywhere for.  In those cases it
doesn't help.

Signed-off-by: Alan Cox <number6@the-village.bc.nu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  15 +++++
 arch/ppc64/kernel/irq.c             |   2 +-
 include/linux/irq.h                 |   5 +-
 kernel/irq/handle.c                 |   2 +-
 kernel/irq/spurious.c               | 113 +++++++++++++++++++++++++++++++++++-
 5 files changed, 131 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f71251f12b..67e99f14419 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -437,6 +437,10 @@ running once the system is up.
 			Format: {"of[f]" | "sk[ipmbr]"}
 			See comment in arch/i386/boot/edd.S
 
+	edd		[EDD]
+			Format: {"of[f]" | "sk[ipmbr]"}
+			See comment in arch/i386/boot/edd.S
+
 	eicon=		[HW,ISDN] 
 			Format: <id>,<membase>,<irq>
 
@@ -622,6 +626,17 @@ running once the system is up.
 	ips=		[HW,SCSI] Adaptec / IBM ServeRAID controller
 			See header of drivers/scsi/ips.c.
 
+	irqfixup	[HW]
+			When an interrupt is not handled search all handlers
+			for it. Intended to get systems with badly broken
+			firmware running.
+
+	irqpoll		[HW]
+			When an interrupt is not handled search all handlers
+			for it. Also check all handlers each timer
+			interrupt. Intended to get systems with badly broken
+			firmware running.
+
 	isapnp=		[ISAPNP]
 			Format: <RDP>, <reset>, <pci_scan>, <verbosity>
 
diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c
index 3defc8c33ad..ffe300611f0 100644
--- a/arch/ppc64/kernel/irq.c
+++ b/arch/ppc64/kernel/irq.c
@@ -245,7 +245,7 @@ void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, regs);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 12277799c00..069d3b84d31 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -85,9 +85,10 @@ extern int no_irq_affinity;
 extern int noirqdebug_setup(char *str);
 
 extern fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
-				       struct irqaction *action);
+					struct irqaction *action);
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
-extern void note_interrupt(unsigned int irq, irq_desc_t *desc, int action_ret);
+extern void note_interrupt(unsigned int irq, irq_desc_t *desc,
+					int action_ret, struct pt_regs *regs);
 extern int can_request_irq(unsigned int irq, unsigned long irqflags);
 
 extern void init_irq_proc(void);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 436c7d93c00..c29f83c1649 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -172,7 +172,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, regs);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index ba039e827d5..7df9abd5ec8 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -11,6 +11,83 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 
+static int irqfixup;
+
+/*
+ * Recovery handler for misrouted interrupts.
+ */
+
+static int misrouted_irq(int irq, struct pt_regs *regs)
+{
+	int i;
+	irq_desc_t *desc;
+	int ok = 0;
+	int work = 0;	/* Did we do work for a real IRQ */
+
+	for(i = 1; i < NR_IRQS; i++) {
+		struct irqaction *action;
+
+		if (i == irq)	/* Already tried */
+			continue;
+		desc = &irq_desc[i];
+		spin_lock(&desc->lock);
+		action = desc->action;
+		/* Already running on another processor */
+		if (desc->status & IRQ_INPROGRESS) {
+			/*
+			 * Already running: If it is shared get the other
+			 * CPU to go looking for our mystery interrupt too
+			 */
+			if (desc->action && (desc->action->flags & SA_SHIRQ))
+				desc->status |= IRQ_PENDING;
+			spin_unlock(&desc->lock);
+			continue;
+		}
+		/* Honour the normal IRQ locking */
+		desc->status |= IRQ_INPROGRESS;
+		spin_unlock(&desc->lock);
+		while (action) {
+			/* Only shared IRQ handlers are safe to call */
+			if (action->flags & SA_SHIRQ) {
+				if (action->handler(i, action->dev_id, regs) ==
+						IRQ_HANDLED)
+					ok = 1;
+			}
+			action = action->next;
+		}
+		local_irq_disable();
+		/* Now clean up the flags */
+		spin_lock(&desc->lock);
+		action = desc->action;
+
+		/*
+		 * While we were looking for a fixup someone queued a real
+		 * IRQ clashing with our walk
+		 */
+
+		while ((desc->status & IRQ_PENDING) && action) {
+			/*
+			 * Perform real IRQ processing for the IRQ we deferred
+			 */
+			work = 1;
+			spin_unlock(&desc->lock);
+			handle_IRQ_event(i, regs, action);
+			spin_lock(&desc->lock);
+			desc->status &= ~IRQ_PENDING;
+		}
+		desc->status &= ~IRQ_INPROGRESS;
+		/*
+		 * If we did actual work for the real IRQ line we must let the
+		 * IRQ controller clean up too
+		 */
+		if(work)
+			desc->handler->end(i);
+		spin_unlock(&desc->lock);
+	}
+	/* So the caller can adjust the irq error counts */
+	return ok;
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -31,7 +108,8 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
 		printk(KERN_ERR "irq event %d: bogus return value %x\n",
 				irq, action_ret);
 	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+		printk(KERN_ERR "irq %d: nobody cared (try booting with "
+				"the \"irqpoll\" option)\n", irq);
 	}
 	dump_stack();
 	printk(KERN_ERR "handlers:\n");
@@ -55,7 +133,8 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
 	}
 }
 
-void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
+			struct pt_regs *regs)
 {
 	if (action_ret != IRQ_HANDLED) {
 		desc->irqs_unhandled++;
@@ -63,6 +142,15 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
 			report_bad_irq(irq, desc, action_ret);
 	}
 
+	if (unlikely(irqfixup)) {
+		/* Don't punish working computers */
+		if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
+			int ok = misrouted_irq(irq, regs);
+			if (action_ret == IRQ_NONE)
+				desc->irqs_unhandled -= ok;
+		}
+	}
+
 	desc->irq_count++;
 	if (desc->irq_count < 100000)
 		return;
@@ -94,3 +182,24 @@ int __init noirqdebug_setup(char *str)
 
 __setup("noirqdebug", noirqdebug_setup);
 
+static int __init irqfixup_setup(char *str)
+{
+	irqfixup = 1;
+	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+	printk(KERN_WARNING "This may impact system performance.\n");
+	return 1;
+}
+
+__setup("irqfixup", irqfixup_setup);
+
+static int __init irqpoll_setup(char *str)
+{
+	irqfixup = 2;
+	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+				"enabled\n");
+	printk(KERN_WARNING "This may significantly impact system "
+				"performance\n");
+	return 1;
+}
+
+__setup("irqpoll", irqpoll_setup);
-- 
cgit v1.2.3-70-g09d2


From 115d6f3fd25991f2a7de1ff4d758086209b1ed12 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Tue, 28 Jun 2005 20:45:27 -0700
Subject: [PATCH] V4L: API new webcam formats included

Add Philips Webcam format.

Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Luc Saillard <luc@saillard.org>.
Signed-off-by: Nickolay V Shmyrev <nshmyrev@yandex.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4e0edce5376..acbfc525576 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -221,6 +221,8 @@ struct v4l2_pix_format
 /*  Vendor-specific formats   */
 #define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W','N','V','A') /* Winnov hw compress */
 #define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S','9','1','0') /* SN9C10x compression */
+#define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P','W','C','1') /* pwc older webcam */
+#define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P','W','C','2') /* pwc newer webcam */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3-70-g09d2


From 0edb586049e57c56e625536476931117a57671e9 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@de.ibm.com>
Date: Wed, 22 Jun 2005 16:59:51 +0200
Subject: [PATCH] driver core: add bus_find_device & driver_find_device
 functions

Add bus_find_device() and driver_find_device() which allow searching for a
device in the bus's resp. the driver's klist and obtain a reference on it.

Signed-off-by: Cornelia Huck <cohuck@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c     | 34 ++++++++++++++++++++++++++++++++++
 drivers/base/driver.c  | 35 +++++++++++++++++++++++++++++++++++
 include/linux/device.h |  5 +++++
 3 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index c3fac7fd555..2c64b792d07 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -177,6 +177,39 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start,
 	return error;
 }
 
+/**
+ * bus_find_device - device iterator for locating a particular device.
+ * @bus: bus type
+ * @start: Device to begin with
+ * @data: Data to pass to match function
+ * @match: Callback function to check device
+ *
+ * This is similar to the bus_for_each_dev() function above, but it
+ * returns a reference to a device that is 'found' for later use, as
+ * determined by the @match callback.
+ *
+ * The callback should return 0 if the device doesn't match and non-zero
+ * if it does.  If the callback returns non-zero, this function will
+ * return to the caller and not iterate over any more devices.
+ */
+struct device * bus_find_device(struct bus_type *bus,
+				struct device *start, void *data,
+				int (*match)(struct device *, void *))
+{
+	struct klist_iter i;
+	struct device *dev;
+
+	if (!bus)
+		return NULL;
+
+	klist_iter_init_node(&bus->klist_devices, &i,
+			     (start ? &start->knode_bus : NULL));
+	while ((dev = next_device(&i)))
+		if (match(dev, data) && get_device(dev))
+			break;
+	klist_iter_exit(&i);
+	return dev;
+}
 
 
 static struct device_driver * next_driver(struct klist_iter * i)
@@ -557,6 +590,7 @@ int __init buses_init(void)
 
 
 EXPORT_SYMBOL_GPL(bus_for_each_dev);
+EXPORT_SYMBOL_GPL(bus_find_device);
 EXPORT_SYMBOL_GPL(bus_for_each_drv);
 
 EXPORT_SYMBOL_GPL(bus_add_device);
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 1b645886e9e..291c5954a3a 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -55,6 +55,41 @@ int driver_for_each_device(struct device_driver * drv, struct device * start,
 EXPORT_SYMBOL_GPL(driver_for_each_device);
 
 
+/**
+ * driver_find_device - device iterator for locating a particular device.
+ * @driver: The device's driver
+ * @start: Device to begin with
+ * @data: Data to pass to match function
+ * @match: Callback function to check device
+ *
+ * This is similar to the driver_for_each_device() function above, but
+ * it returns a reference to a device that is 'found' for later use, as
+ * determined by the @match callback.
+ *
+ * The callback should return 0 if the device doesn't match and non-zero
+ * if it does.  If the callback returns non-zero, this function will
+ * return to the caller and not iterate over any more devices.
+ */
+struct device * driver_find_device(struct device_driver *drv,
+				   struct device * start, void * data,
+				   int (*match)(struct device *, void *))
+{
+	struct klist_iter i;
+	struct device *dev;
+
+	if (!drv)
+		return NULL;
+
+	klist_iter_init_node(&drv->klist_devices, &i,
+			     (start ? &start->knode_driver : NULL));
+	while ((dev = next_device(&i)))
+		if (match(dev, data) && get_device(dev))
+			break;
+	klist_iter_exit(&i);
+	return dev;
+}
+EXPORT_SYMBOL_GPL(driver_find_device);
+
 /**
  *	driver_create_file - create sysfs file for driver.
  *	@drv:	driver.
diff --git a/include/linux/device.h b/include/linux/device.h
index 7b781a72b29..07222c531d3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -80,6 +80,8 @@ extern struct bus_type * find_bus(char * name);
 
 int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
 		     int (*fn)(struct device *, void *));
+struct device * bus_find_device(struct bus_type *bus, struct device *start,
+				void *data, int (*match)(struct device *, void *));
 
 int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, 
 		     void * data, int (*fn)(struct device_driver *, void *));
@@ -142,6 +144,9 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute *
 
 extern int driver_for_each_device(struct device_driver * drv, struct device * start,
 				  void * data, int (*fn)(struct device *, void *));
+struct device * driver_find_device(struct device_driver *drv,
+				   struct device *start, void *data,
+				   int (*match)(struct device *, void *));
 
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 23d3d602cb96addd3c1158424fb01a49ea5e81b1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 22 Jun 2005 16:09:05 -0700
Subject: [PATCH] driver core: change bus_rescan_devices to return void

No one was looking at the return value of bus_rescan_devices, and it
really wasn't anything that anyone in the kernel would ever care about.
So change it which enabled some counting code to be removed also.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c     | 27 +++++++++------------------
 include/linux/device.h |  2 +-
 2 files changed, 10 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 7e17488271a..96fe2f95675 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -483,31 +483,22 @@ void bus_remove_driver(struct device_driver * drv)
 /* Helper for bus_rescan_devices's iter */
 static int bus_rescan_devices_helper(struct device *dev, void *data)
 {
-	int *count = data;
-
-	if (!dev->driver && (device_attach(dev) > 0))
-		(*count)++;
-
+	if (!dev->driver)
+		device_attach(dev);
 	return 0;
 }
 
-
 /**
- *	bus_rescan_devices - rescan devices on the bus for possible drivers
- *	@bus:	the bus to scan.
+ * bus_rescan_devices - rescan devices on the bus for possible drivers
+ * @bus: the bus to scan.
  *
- *	This function will look for devices on the bus with no driver
- *	attached and rescan it against existing drivers to see if it
- *	matches any. Calls device_attach(). Returns the number of devices
- *	that were sucessfully bound to a driver.
+ * This function will look for devices on the bus with no driver
+ * attached and rescan it against existing drivers to see if it matches
+ * any by calling device_attach() for the unbound devices.
  */
-int bus_rescan_devices(struct bus_type * bus)
+void bus_rescan_devices(struct bus_type * bus)
 {
-	int count = 0;
-
-	bus_for_each_dev(bus, NULL, &count, bus_rescan_devices_helper);
-
-	return count;
+	bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper);
 }
 
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 07222c531d3..f378c846e6d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -69,7 +69,7 @@ struct bus_type {
 extern int bus_register(struct bus_type * bus);
 extern void bus_unregister(struct bus_type * bus);
 
-extern int bus_rescan_devices(struct bus_type * bus);
+extern void bus_rescan_devices(struct bus_type * bus);
 
 extern struct bus_type * get_bus(struct bus_type * bus);
 extern void put_bus(struct bus_type * bus);
-- 
cgit v1.2.3-70-g09d2


From a03fa955576af50df80bec9127b46ef57e0877c0 Mon Sep 17 00:00:00 2001
From: "rajesh.shah@intel.com" <rajesh.shah@intel.com>
Date: Thu, 2 Jun 2005 15:41:48 -0700
Subject: [PATCH] PCI: Increase the number of PCI bus resources

This patch increases the number of resource pointers in the
pci_bus structure. This is needed to store >4 resource ranges
for host bridges and transparent PCI bridges. With this change,
all PCI buses will have more resource pointers, but most PCI
buses will only use the first 3 or 4, the remaining being NULL.
The PCI core already deals with this correctly.

Signed-off-by: Rajesh Shah <rajesh.shah@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 66798b46f30..a46cabfd08c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -586,7 +586,7 @@ struct pci_dev {
 #define PCI_NUM_RESOURCES 11
 
 #ifndef PCI_BUS_NUM_RESOURCES
-#define PCI_BUS_NUM_RESOURCES 4
+#define PCI_BUS_NUM_RESOURCES 8
 #endif
   
 #define PCI_REGION_FLAG_MASK 0x0fU	/* These bits of resource flags tell us the PCI region flags */
-- 
cgit v1.2.3-70-g09d2


From 75865858971add95809c5c9cd35dc4cfba08e33b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 30 Jun 2005 02:18:12 -0700
Subject: [PATCH] PCI: clean up dynamic pci id logic

The dynamic pci id logic has been bothering me for a while, and now that
I started to look into how to move some of this to the driver core, I
thought it was time to clean it all up.

It ends up making the code smaller, and easier to follow, and fixes a
few bugs at the same time (dynamic ids were not being matched
everywhere, and so could be missed on some call paths for new devices,
semaphore not needed to be grabbed when adding a new id and calling the
driver core, etc.)

I also renamed the function pci_match_device() to pci_match_id() as
that's what it really does.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/i386/kernel/cpu/cpufreq/gx-suspmod.c |   2 +-
 drivers/char/hw_random.c                  |   2 +-
 drivers/char/watchdog/i8xx_tco.c          |   2 +-
 drivers/ide/setup-pci.c                   |   2 +-
 drivers/parport/parport_pc.c              |   2 +-
 drivers/pci/pci-driver.c                  | 196 +++++++++++-------------------
 include/linux/pci-dynids.h                |  18 ---
 include/linux/pci.h                       |   3 +-
 sound/pci/bt87x.c                         |   2 +-
 9 files changed, 79 insertions(+), 150 deletions(-)
 delete mode 100644 include/linux/pci-dynids.h

(limited to 'include/linux')

diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 1a49adb1f4a..e86ea486c31 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -190,7 +190,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 
 	/* detect which companion chip is used */
 	while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
-		if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) {
+		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) {
 			return gx_pci;
 		}
 	}
diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c
index 7e6ac14c245..3480535a09c 100644
--- a/drivers/char/hw_random.c
+++ b/drivers/char/hw_random.c
@@ -579,7 +579,7 @@ static int __init rng_init (void)
 
 	/* Probe for Intel, AMD RNGs */
 	for_each_pci_dev(pdev) {
-		ent = pci_match_device (rng_pci_tbl, pdev);
+		ent = pci_match_id(rng_pci_tbl, pdev);
 		if (ent) {
 			rng_ops = &rng_vendor_ops[ent->driver_data];
 			goto match;
diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c
index b14d642439e..5d07ee59679 100644
--- a/drivers/char/watchdog/i8xx_tco.c
+++ b/drivers/char/watchdog/i8xx_tco.c
@@ -401,7 +401,7 @@ static unsigned char __init i8xx_tco_getdevice (void)
 	 */
 
 	while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		if (pci_match_device(i8xx_tco_pci_tbl, dev)) {
+		if (pci_match_id(i8xx_tco_pci_tbl, dev)) {
 			i8xx_tco_pci = dev;
 			break;
 		}
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index e501675ad72..77da827b289 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -847,7 +847,7 @@ static int __init ide_scan_pcidev(struct pci_dev *dev)
 		d = list_entry(l, struct pci_driver, node);
 		if(d->id_table)
 		{
-			const struct pci_device_id *id = pci_match_device(d->id_table, dev);
+			const struct pci_device_id *id = pci_match_id(d->id_table, dev);
 			if(id != NULL)
 			{
 				if(d->probe(dev, id) >= 0)
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 80edfa3abd2..4598c6a9212 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -3008,7 +3008,7 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
 	int ret = 0;
 
 	while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) {
-		id = pci_match_device (parport_pc_pci_tbl, pdev);
+		id = pci_match_id(parport_pc_pci_tbl, pdev);
 		if (id == NULL || id->driver_data >= last_sio)
 			continue;
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index e65bf2b395a..aac6de9568e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -7,7 +7,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
-#include <linux/pci-dynids.h>
 #include "pci.h"
 
 /*
@@ -19,35 +18,11 @@
  */
 
 #ifdef CONFIG_HOTPLUG
-/**
- * pci_device_probe_dynamic()
- *
- * Walk the dynamic ID list looking for a match.
- * returns 0 and sets pci_dev->driver when drv claims pci_dev, else error.
- */
-static int
-pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev)
-{
-	int error = -ENODEV;
-	struct list_head *pos;
-	struct dynid *dynid;
 
-	spin_lock(&drv->dynids.lock);
-	list_for_each(pos, &drv->dynids.list) {
-		dynid = list_entry(pos, struct dynid, node);
-		if (pci_match_one_device(&dynid->id, pci_dev)) {
-			spin_unlock(&drv->dynids.lock);
-			error = drv->probe(pci_dev, &dynid->id);
-			if (error >= 0) {
-				pci_dev->driver = drv;
-				return 0;
-			}
-			return error;
-		}
-	}
-	spin_unlock(&drv->dynids.lock);
-	return error;
-}
+struct pci_dynid {
+	struct list_head node;
+	struct pci_device_id id;
+};
 
 /**
  * store_new_id
@@ -58,8 +33,7 @@ pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev)
 static inline ssize_t
 store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
-	struct dynid *dynid;
-	struct bus_type * bus;
+	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
 	__u32 vendor=PCI_ANY_ID, device=PCI_ANY_ID, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
@@ -91,37 +65,22 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 	list_add_tail(&pdrv->dynids.list, &dynid->node);
 	spin_unlock(&pdrv->dynids.lock);
 
-	bus = get_bus(pdrv->driver.bus);
-	if (bus) {
-		if (get_driver(&pdrv->driver)) {
-			down_write(&bus->subsys.rwsem);
-			driver_attach(&pdrv->driver);
-			up_write(&bus->subsys.rwsem);
-			put_driver(&pdrv->driver);
-		}
-		put_bus(bus);
+	if (get_driver(&pdrv->driver)) {
+		driver_attach(&pdrv->driver);
+		put_driver(&pdrv->driver);
 	}
 
 	return count;
 }
-
 static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
-static inline void
-pci_init_dynids(struct pci_dynids *dynids)
-{
-	spin_lock_init(&dynids->lock);
-	INIT_LIST_HEAD(&dynids->list);
-}
 
 static void
 pci_free_dynids(struct pci_driver *drv)
 {
-	struct list_head *pos, *n;
-	struct dynid *dynid;
+	struct pci_dynid *dynid, *n;
 
 	spin_lock(&drv->dynids.lock);
-	list_for_each_safe(pos, n, &drv->dynids.list) {
-		dynid = list_entry(pos, struct dynid, node);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
 		list_del(&dynid->node);
 		kfree(dynid);
 	}
@@ -138,83 +97,70 @@ pci_create_newid_file(struct pci_driver *drv)
 	return error;
 }
 
-static int
-pci_bus_match_dynids(const struct pci_dev *pci_dev, struct pci_driver *pci_drv)
-{
-	struct list_head *pos;
-	struct dynid *dynid;
-
-	spin_lock(&pci_drv->dynids.lock);
-	list_for_each(pos, &pci_drv->dynids.list) {
-		dynid = list_entry(pos, struct dynid, node);
-		if (pci_match_one_device(&dynid->id, pci_dev)) {
-			spin_unlock(&pci_drv->dynids.lock);
-			return 1;
-		}
-	}
-	spin_unlock(&pci_drv->dynids.lock);
-	return 0;
-}
-
 #else /* !CONFIG_HOTPLUG */
-static inline int pci_device_probe_dynamic(struct pci_driver *drv, struct pci_dev *pci_dev)
-{
-	return -ENODEV;
-}
-static inline void pci_init_dynids(struct pci_dynids *dynids) {}
 static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline int pci_create_newid_file(struct pci_driver *drv)
 {
 	return 0;
 }
-static inline int pci_bus_match_dynids(const struct pci_dev *pci_dev, struct pci_driver *pci_drv)
-{
-	return 0;
-}
 #endif
 
 /**
- * pci_match_device - Tell if a PCI device structure has a matching
- *                    PCI device id structure
+ * pci_match_id - See if a pci device matches a given pci_id table
  * @ids: array of PCI device id structures to search in
- * @dev: the PCI device structure to match against
- * 
+ * @dev: the PCI device structure to match against.
+ *
  * Used by a driver to check whether a PCI device present in the
- * system is in its list of supported devices.Returns the matching
+ * system is in its list of supported devices.  Returns the matching
  * pci_device_id structure or %NULL if there is no match.
+ *
+ * Depreciated, don't use this as it will not catch any dynamic ids
+ * that a driver might want to check for.
  */
-const struct pci_device_id *
-pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev)
+const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
+					 struct pci_dev *dev)
 {
-	while (ids->vendor || ids->subvendor || ids->class_mask) {
-		if (pci_match_one_device(ids, dev))
-			return ids;
-		ids++;
+	if (ids) {
+		while (ids->vendor || ids->subvendor || ids->class_mask) {
+			if (pci_match_one_device(ids, dev))
+				return ids;
+			ids++;
+		}
 	}
 	return NULL;
 }
 
 /**
- * pci_device_probe_static()
- * 
- * returns 0 and sets pci_dev->driver when drv claims pci_dev, else error.
+ * pci_match_device - Tell if a PCI device structure has a matching
+ *                    PCI device id structure
+ * @ids: array of PCI device id structures to search in
+ * @dev: the PCI device structure to match against
+ * @drv: the PCI driver to match against
+ *
+ * Used by a driver to check whether a PCI device present in the
+ * system is in its list of supported devices.  Returns the matching
+ * pci_device_id structure or %NULL if there is no match.
  */
-static int
-pci_device_probe_static(struct pci_driver *drv, struct pci_dev *pci_dev)
-{		   
-	int error = -ENODEV;
+const struct pci_device_id *pci_match_device(struct pci_driver *drv,
+					     struct pci_dev *dev)
+{
 	const struct pci_device_id *id;
+	struct pci_dynid *dynid;
 
-	if (!drv->id_table)
-		return error;
-	id = pci_match_device(drv->id_table, pci_dev);
+	id = pci_match_id(drv->id_table, dev);
 	if (id)
-		error = drv->probe(pci_dev, id);
-	if (error >= 0) {
-		pci_dev->driver = drv;
-		error = 0;
+		return id;
+
+	/* static ids didn't match, lets look at the dynamic ones */
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry(dynid, &drv->dynids.list, node) {
+		if (pci_match_one_device(&dynid->id, dev)) {
+			spin_unlock(&drv->dynids.lock);
+			return &dynid->id;
+		}
 	}
-	return error;
+	spin_unlock(&drv->dynids.lock);
+	return NULL;
 }
 
 /**
@@ -225,13 +171,20 @@ pci_device_probe_static(struct pci_driver *drv, struct pci_dev *pci_dev)
  */
 static int
 __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
-{		   
+{
+	const struct pci_device_id *id;
 	int error = 0;
 
 	if (!pci_dev->driver && drv->probe) {
-		error = pci_device_probe_static(drv, pci_dev);
-		if (error == -ENODEV)
-			error = pci_device_probe_dynamic(drv, pci_dev);
+		error = -ENODEV;
+
+		id = pci_match_device(drv, pci_dev);
+		if (id)
+			error = drv->probe(pci_dev, id);
+		if (error >= 0) {
+			pci_dev->driver = drv;
+			error = 0;
+		}
 	}
 	return error;
 }
@@ -371,12 +324,6 @@ static struct kobj_type pci_driver_kobj_type = {
 	.sysfs_ops = &pci_driver_sysfs_ops,
 };
 
-static int
-pci_populate_driver_dir(struct pci_driver *drv)
-{
-	return pci_create_newid_file(drv);
-}
-
 /**
  * pci_register_driver - register a new pci driver
  * @drv: the driver structure to register
@@ -401,13 +348,15 @@ int pci_register_driver(struct pci_driver *drv)
 		drv->driver.shutdown = pci_device_shutdown;
 	drv->driver.owner = drv->owner;
 	drv->driver.kobj.ktype = &pci_driver_kobj_type;
-	pci_init_dynids(&drv->dynids);
+
+	spin_lock_init(&drv->dynids.lock);
+	INIT_LIST_HEAD(&drv->dynids.list);
 
 	/* register with core */
 	error = driver_register(&drv->driver);
 
 	if (!error)
-		pci_populate_driver_dir(drv);
+		error = pci_create_newid_file(drv);
 
 	return error;
 }
@@ -463,21 +412,17 @@ pci_dev_driver(const struct pci_dev *dev)
  * system is in its list of supported devices.Returns the matching
  * pci_device_id structure or %NULL if there is no match.
  */
-static int pci_bus_match(struct device * dev, struct device_driver * drv) 
+static int pci_bus_match(struct device *dev, struct device_driver *drv)
 {
-	const struct pci_dev * pci_dev = to_pci_dev(dev);
-	struct pci_driver * pci_drv = to_pci_driver(drv);
-	const struct pci_device_id * ids = pci_drv->id_table;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *pci_drv = to_pci_driver(drv);
 	const struct pci_device_id *found_id;
 
-	if (!ids)
-		return 0;
-
-	found_id = pci_match_device(ids, pci_dev);
+	found_id = pci_match_device(pci_drv, pci_dev);
 	if (found_id)
 		return 1;
 
-	return pci_bus_match_dynids(pci_dev, pci_drv);
+	return 0;
 }
 
 /**
@@ -536,6 +481,7 @@ static int __init pci_driver_init(void)
 
 postcore_initcall(pci_driver_init);
 
+EXPORT_SYMBOL(pci_match_id);
 EXPORT_SYMBOL(pci_match_device);
 EXPORT_SYMBOL(pci_register_driver);
 EXPORT_SYMBOL(pci_unregister_driver);
diff --git a/include/linux/pci-dynids.h b/include/linux/pci-dynids.h
deleted file mode 100644
index 183b6b0de81..00000000000
--- a/include/linux/pci-dynids.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *	PCI defines and function prototypes
- *	Copyright 2003 Dell Inc.
- *        by Matt Domsch <Matt_Domsch@dell.com>
- */
-
-#ifndef LINUX_PCI_DYNIDS_H
-#define LINUX_PCI_DYNIDS_H
-
-#include <linux/list.h>
-#include <linux/mod_devicetable.h>
-
-struct dynid {
-	struct list_head        node;
-	struct pci_device_id    id;
-};
-
-#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a46cabfd08c..7ac14961ba2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -860,7 +860,8 @@ int pci_register_driver(struct pci_driver *);
 void pci_unregister_driver(struct pci_driver *);
 void pci_remove_behind_bridge(struct pci_dev *);
 struct pci_driver *pci_dev_driver(const struct pci_dev *);
-const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev);
+const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev);
+const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev);
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass);
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index defdc5a459f..909fef8903c 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -804,7 +804,7 @@ static int __devinit snd_bt87x_detect_card(struct pci_dev *pci)
 	int i;
 	const struct pci_device_id *supported;
 
-	supported = pci_match_device(snd_bt87x_ids, pci);
+	supported = pci_match_device(driver, pci);
 	if (supported)
 		return supported->driver_data;
 
-- 
cgit v1.2.3-70-g09d2


From 21e2c01dc3e38d466eda5871645878d2c3a33261 Mon Sep 17 00:00:00 2001
From: Rob Punkunus <rpunkunus@nvidia.com>
Date: Sun, 3 Jul 2005 17:37:18 +0200
Subject: [PATCH] amd74xx: support MCP55 device IDs

From: Rob Punkunus <rpunkunus@nvidia.com>

Rob Punkunus recently submitted a patch to enable support for MCP51/MCP55 in
the amd74xx driver. This patch was whitespace-corrupted and didn't apply to
2.6.12 since MCP51 support was merged in the 2.6.12-rc series.

Gentoo would like to support this hardware for our upcoming release media, so
I fixed the patch, and here it is :)

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@elka.pw.edu.pl>
---
 drivers/ide/pci/amd74xx.c | 3 +++
 include/linux/pci_ids.h   | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 65eab9b63a7..844a6c9fb94 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -73,6 +73,7 @@ static struct amd_ide_chip {
 	{ PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE,	0x50, AMD_UDMA_133 },
 	{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE,	0x50, AMD_UDMA_133 },
 	{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE,	0x50, AMD_UDMA_133 },
+	{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE,	0x50, AMD_UDMA_133 },
 	{ 0 }
 };
 
@@ -489,6 +490,7 @@ static ide_pci_device_t amd74xx_chipsets[] __devinitdata = {
 	/* 13 */ DECLARE_NV_DEV("NFORCE-CK804"),
 	/* 14 */ DECLARE_NV_DEV("NFORCE-MCP04"),
 	/* 15 */ DECLARE_NV_DEV("NFORCE-MCP51"),
+	/* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"),
 };
 
 static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -524,6 +526,7 @@ static struct pci_device_id amd74xx_pci_tbl[] = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE,	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE,	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 14 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE,	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 15 },
+	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE,	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c3ee1ae4545..27348c22dac 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1238,6 +1238,7 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE	0x0265
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA	0x0266
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2	0x0267
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE	0x036E
 #define PCI_DEVICE_ID_NVIDIA_NVENET_12		0x0268
 #define PCI_DEVICE_ID_NVIDIA_NVENET_13		0x0269
 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO	0x026B
-- 
cgit v1.2.3-70-g09d2


From e7270dec080002d8aa18256c756af6c32331ef48 Mon Sep 17 00:00:00 2001
From: Raphael Assenat <raph@raphnet.net>
Date: Mon, 4 Jul 2005 13:23:45 -0700
Subject: [SPARC64/COMPAT]: Add some compat ioctl for ppdev

The following patch adds some ioctls to include/linux/compat_ioctl.h
to allow using ppdev from the 32 bit user space on sparc64.

This patch also adds the PPDEV option in the sparc64 menu, near Parallel
printer support in the 'General machine setup' submenu.

All those ioctls seem to be compatible, since (correct me if I'm wrong)
they dont use the 'long' type. See include/linux/ppdev.h.

The application I used to test the new ioctls only used the following:
PPEXCL
PPCLAIM
PPNEGOT
PPGETMODES
PPRCONTROL
PPWCONTROL
PPDATADIR
PPWDATA
PPRDATA

But I beleive that the other ioctls will work fine.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/Kconfig         | 18 ++++++++++++++++++
 include/linux/compat_ioctl.h | 19 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index e2b050eb3b9..d78bc13ebbb 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -444,6 +444,24 @@ config PRINTER
 	  If you have more than 8 printers, you need to increase the LP_NO
 	  macro in lp.c and the PARPORT_MAX macro in parport.h.
 
+config PPDEV
+	tristate "Support for user-space parallel port device drivers"
+	depends on PARPORT
+	---help---
+	  Saying Y to this adds support for /dev/parport device nodes.  This
+	  is needed for programs that want portable access to the parallel
+	  port, for instance deviceid (which displays Plug-and-Play device
+	  IDs).
+
+	  This is the parallel port equivalent of SCSI generic support (sg).
+	  It is safe to say N to this -- it is not needed for normal printing
+	  or parallel port CD-ROM/disk support.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ppdev.
+
+	  If unsure, say N.
+
 config ENVCTRL
 	tristate "SUNW, envctrl support"
 	depends on PCI
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 70a4ebb5d96..ecb0d39c079 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -346,10 +346,27 @@ COMPATIBLE_IOCTL(PPPOEIOCDFWD)
 /* LP */
 COMPATIBLE_IOCTL(LPGETSTATUS)
 /* ppdev */
+COMPATIBLE_IOCTL(PPSETMODE)
+COMPATIBLE_IOCTL(PPRSTATUS)
+COMPATIBLE_IOCTL(PPRCONTROL)
+COMPATIBLE_IOCTL(PPWCONTROL)
+COMPATIBLE_IOCTL(PPFCONTROL)
+COMPATIBLE_IOCTL(PPRDATA)
+COMPATIBLE_IOCTL(PPWDATA)
 COMPATIBLE_IOCTL(PPCLAIM)
 COMPATIBLE_IOCTL(PPRELEASE)
-COMPATIBLE_IOCTL(PPEXCL)
 COMPATIBLE_IOCTL(PPYIELD)
+COMPATIBLE_IOCTL(PPEXCL)
+COMPATIBLE_IOCTL(PPDATADIR)
+COMPATIBLE_IOCTL(PPNEGOT)
+COMPATIBLE_IOCTL(PPWCTLONIRQ)
+COMPATIBLE_IOCTL(PPCLRIRQ)
+COMPATIBLE_IOCTL(PPSETPHASE)
+COMPATIBLE_IOCTL(PPGETMODES)
+COMPATIBLE_IOCTL(PPGETMODE)
+COMPATIBLE_IOCTL(PPGETPHASE)
+COMPATIBLE_IOCTL(PPGETFLAGS)
+COMPATIBLE_IOCTL(PPSETFLAGS)
 /* CDROM stuff */
 COMPATIBLE_IOCTL(CDROMPAUSE)
 COMPATIBLE_IOCTL(CDROMRESUME)
-- 
cgit v1.2.3-70-g09d2


From 55820ee2f8c767a2833b21bd365e5753f50bd8ce Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Jul 2005 14:08:10 -0700
Subject: [NET]: Fix signedness issues in net/core/filter.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is the code to load packet data into a register:

                        k = fentry->k;
                        if (k < 0) {
...
                        } else {
                                u32 _tmp, *p;
                                p = skb_header_pointer(skb, k, 4, &_tmp);
                                if (p != NULL) {
                                        A = ntohl(*p);
                                        continue;
                                }
                        }

skb_header_pointer checks if the requested data is within the
linear area:

        int hlen = skb_headlen(skb);

        if (offset + len <= hlen)
                return skb->data + offset;

When offset is within [INT_MAX-len+1..INT_MAX] the addition will
result in a negative number which is <= hlen.

I couldn't trigger a crash on my AMD64 with 2GB of memory, but a
coworker tried on his x86 machine and it crashed immediately.

This patch fixes the check in skb_header_pointer to handle large
positive offsets similar to skb_copy_bits. Invalid data can still
be accessed using negative offsets (also similar to skb_copy_bits),
anyone using negative offsets needs to verify them himself.

Thanks to Thomas V�gtle <thomas.voegtle@coreworks.de> for verifying the
problem by crashing his machine and providing me with an Oops.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 416a2e4024b..fbcb1865197 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1211,7 +1211,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 {
 	int hlen = skb_headlen(skb);
 
-	if (offset + len <= hlen)
+	if (hlen - offset >= len)
 		return skb->data + offset;
 
 	if (skb_copy_bits(skb, offset, buffer, len) < 0)
-- 
cgit v1.2.3-70-g09d2


From e176fe8954a5239c24afe79b1001ba3c29511963 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 5 Jul 2005 14:12:44 -0700
Subject: [NET]: Remove unused security member in sk_buff

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h               | 4 +---
 include/linux/tc_ematch/tc_em_meta.h | 2 +-
 net/core/skbuff.c                    | 2 --
 net/ipv4/ip_output.c                 | 1 -
 net/ipv6/ip6_output.c                | 1 -
 net/sched/em_meta.c                  | 6 ------
 6 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fbcb1865197..1e6290f4f81 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -183,7 +183,6 @@ struct skb_shared_info {
  *	@priority: Packet queueing priority
  *	@users: User count - see {datagram,tcp}.c
  *	@protocol: Packet protocol from driver
- *	@security: Security level of packet
  *	@truesize: Buffer size 
  *	@head: Head of buffer
  *	@data: Data head pointer
@@ -255,8 +254,7 @@ struct sk_buff {
 				pkt_type,
 				ip_summed;
 	__u32			priority;
-	unsigned short		protocol,
-				security;
+	unsigned short		protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_NETFILTER
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index a6b2cc530af..bcb762d9312 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -45,7 +45,7 @@ enum
 	TCF_META_ID_REALDEV,
 	TCF_META_ID_PRIORITY,
 	TCF_META_ID_PROTOCOL,
-	TCF_META_ID_SECURITY,
+	TCF_META_ID_SECURITY, /* obsolete */
 	TCF_META_ID_PKTTYPE,
 	TCF_META_ID_PKTLEN,
 	TCF_META_ID_DATALEN,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb73b2190ec..733deee24b9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -357,7 +357,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
 	C(ip_summed);
 	C(priority);
 	C(protocol);
-	C(security);
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
@@ -422,7 +421,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->pkt_type	= old->pkt_type;
 	new->stamp	= old->stamp;
 	new->destructor = NULL;
-	new->security	= old->security;
 #ifdef CONFIG_NETFILTER
 	new->nfmark	= old->nfmark;
 	new->nfcache	= old->nfcache;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ce5c3292f9..1bfa49eda96 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -389,7 +389,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
-	to->security = from->security;
 	dst_release(to->dst);
 	to->dst = dst_clone(from->dst);
 	to->dev = from->dev;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 06e7cdaeedc..1f2c2f9e353 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
-	to->security = from->security;
 	dst_release(to->dst);
 	to->dst = dst_clone(from->dst);
 	to->dev = from->dev;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 48bb23c2a35..53d98f8d3d8 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
 	dst->value = skb->protocol;
 }
 
-META_COLLECTOR(int_security)
-{
-	dst->value = skb->security;
-}
-
 META_COLLECTOR(int_pkttype)
 {
 	dst->value = skb->pkt_type;
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 		[META_ID(REALDEV)]		= META_FUNC(int_realdev),
 		[META_ID(PRIORITY)]		= META_FUNC(int_priority),
 		[META_ID(PROTOCOL)]		= META_FUNC(int_protocol),
-		[META_ID(SECURITY)]		= META_FUNC(int_security),
 		[META_ID(PKTTYPE)]		= META_FUNC(int_pkttype),
 		[META_ID(PKTLEN)]		= META_FUNC(int_pktlen),
 		[META_ID(DATALEN)]		= META_FUNC(int_datalen),
-- 
cgit v1.2.3-70-g09d2


From 1cbb3380ef683f742876f48e3739b3df4ea9e168 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 5 Jul 2005 14:13:41 -0700
Subject: [NET]: Reduce size of sk_buff by 4 bytes

Reduce local_df to a bit field and ip_summed to a 2 bits
field thus saving 13 bits. Move bit fields, packet type,
and protocol into the spare area between the priority
and the destructor. Saves 4 bytes on both, 32bit and
64bit architectures.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1e6290f4f81..14b95041349 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -248,17 +248,18 @@ struct sk_buff {
 				data_len,
 				mac_len,
 				csum;
-	unsigned char		local_df,
-				cloned:1,
-				nohdr:1,
-				pkt_type,
-				ip_summed;
 	__u32			priority;
-	unsigned short		protocol;
+	__u8			local_df:1,
+				cloned:1,
+				ip_summed:2,
+				nohdr:1;
+				/* 3 bits spare */
+	__u8			pkt_type;
+	__u16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_NETFILTER
-        unsigned long		nfmark;
+	unsigned long		nfmark;
 	__u32			nfcache;
 	__u32			nfctinfo;
 	struct nf_conntrack	*nfct;
-- 
cgit v1.2.3-70-g09d2


From bc971dee6ece1fd0d431948924becd9c50e7b778 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 Jul 2005 15:03:46 -0700
Subject: [SHAPER]: Switch to spinlocks.

Dave, you were right and the sleeping locks in shaper were
broken. Markus Kanet noticed this and also tested the patch below that
switches locking to spinlocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/shaper.c      | 42 ++++++++++++++++--------------------------
 include/linux/if_shaper.h |  2 +-
 2 files changed, 17 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c
index 20edeb34579..3ad0b6751f6 100644
--- a/drivers/net/shaper.c
+++ b/drivers/net/shaper.c
@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct shaper *shaper = dev->priv;
  	struct sk_buff *ptr;
-   
-	if (down_trylock(&shaper->sem))
-		return -1;
-
+  
+	spin_lock(&shaper->lock);
  	ptr=shaper->sendq.prev;
  	
  	/*
@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
                 shaper->stats.collisions++;
  	}
 	shaper_kick(shaper);
-	up(&shaper->sem);
+	spin_unlock(&shaper->lock);
  	return 0;
 }
 
@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long data)
 {
 	struct shaper *shaper = (struct shaper *)data;
 
-	if (!down_trylock(&shaper->sem)) {
-		shaper_kick(shaper);
-		up(&shaper->sem);
-	} else
-		mod_timer(&shaper->timer, jiffies);
+	spin_lock(&shaper->lock);
+	shaper_kick(shaper);
+	spin_unlock(&shaper->lock);
 }
 
 /*
@@ -331,21 +327,6 @@ static void shaper_kick(struct shaper *shaper)
 }
 
 
-/*
- *	Flush the shaper queues on a closedown
- */
- 
-static void shaper_flush(struct shaper *shaper)
-{
-	struct sk_buff *skb;
-
-	down(&shaper->sem);
-	while((skb=skb_dequeue(&shaper->sendq))!=NULL)
-		dev_kfree_skb(skb);
-	shaper_kick(shaper);
-	up(&shaper->sem);
-}
-
 /*
  *	Bring the interface up. We just disallow this until a 
  *	bind.
@@ -375,7 +356,15 @@ static int shaper_open(struct net_device *dev)
 static int shaper_close(struct net_device *dev)
 {
 	struct shaper *shaper=dev->priv;
-	shaper_flush(shaper);
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&shaper->sendq)) != NULL)
+		dev_kfree_skb(skb);
+
+	spin_lock_bh(&shaper->lock);
+	shaper_kick(shaper);
+	spin_unlock_bh(&shaper->lock);
+
 	del_timer_sync(&shaper->timer);
 	return 0;
 }
@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_device *dev)
 	init_timer(&sh->timer);
 	sh->timer.function=shaper_timer;
 	sh->timer.data=(unsigned long)sh;
+	spin_lock_init(&sh->lock);
 }
 
 /*
diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h
index 004e6f09a6e..68c896a36a3 100644
--- a/include/linux/if_shaper.h
+++ b/include/linux/if_shaper.h
@@ -23,7 +23,7 @@ struct shaper
 	__u32 shapeclock;
 	unsigned long recovery;	/* Time we can next clock a packet out on
 				   an empty queue */
-	struct semaphore sem;
+	spinlock_t lock;
         struct net_device_stats stats;
 	struct net_device *dev;
 	int  (*hard_start_xmit) (struct sk_buff *skb,
-- 
cgit v1.2.3-70-g09d2


From c1b4a7e69576d65efc31a8cea0714173c2841244 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 5 Jul 2005 15:24:38 -0700
Subject: [TCP]: Move to new TSO segmenting scheme.

Make TSO segment transmit size decisions at send time not earlier.

The basic scheme is that we try to build as large a TSO frame as
possible when pulling in the user data, but the size of the TSO frame
output to the card is determined at transmit time.

This is guided by tp->xmit_size_goal.  It is always set to a multiple
of MSS and tells sendmsg/sendpage how large an SKB to try and build.

Later, tcp_write_xmit() and tcp_push_one() chop up the packet if
necessary and conditions warrant.  These routines can also decide to
"defer" in order to wait for more ACKs to arrive and thus allow larger
TSO frames to be emitted.

A general observation is that TSO elongates the pipe, thus requiring a
larger congestion window and larger buffering especially at the sender
side.  Therefore, it is important that applications 1) get a large
enough socket send buffer (this is accomplished by our dynamic send
buffer expansion code) 2) do large enough writes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |   2 +-
 include/net/tcp.h     |   4 +-
 net/ipv4/tcp.c        |  26 ++-
 net/ipv4/tcp_input.c  |  10 +-
 net/ipv4/tcp_ipv4.c   |   2 +-
 net/ipv4/tcp_output.c | 578 +++++++++++++++++++++++++++++++-------------------
 net/ipv6/tcp_ipv6.c   |   2 +-
 7 files changed, 384 insertions(+), 240 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index dfd93d03f5d..e4fd82e4210 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -286,7 +286,7 @@ struct tcp_sock {
 	__u32	max_window;	/* Maximal window ever seen from peer	*/
 	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
 	__u32	mss_cache;	/* Cached effective mss, not including SACKS */
-	__u16	mss_cache_std;	/* Like mss_cache, but without TSO */
+	__u16	xmit_size_goal;	/* Goal for segmenting output packets	*/
 	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
 	__u8	ca_state;	/* State of fast-retransmit machine 	*/
 	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b19238027da..a166918ca56 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -862,7 +862,7 @@ extern int  tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
 extern void tcp_send_active_reset(struct sock *sk, int priority);
 extern int  tcp_send_synack(struct sock *);
-extern void tcp_push_one(struct sock *, unsigned mss_now);
+extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
 
@@ -968,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
 static inline void tcp_initialize_rcv_mss(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int hint = min(tp->advmss, tp->mss_cache_std);
+	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
 	hint = min(hint, tp->rcv_wnd/2);
 	hint = min(hint, TCP_MIN_RCVMSS);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2ba73bf3a8f..29894c74916 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 			 size_t psize, int flags)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now;
+	int mss_now, size_goal;
 	int err;
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
 	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+	size_goal = tp->xmit_size_goal;
 	copied = 0;
 
 	err = -EPIPE;
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
 
-		if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {
+		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
@@ -652,7 +653,7 @@ new_segment:
 				goto wait_for_memory;
 
 			skb_entail(sk, tp, skb);
-			copy = mss_now;
+			copy = size_goal;
 		}
 
 		if (copy > size)
@@ -693,7 +694,7 @@ new_segment:
 		if (!(psize -= copy))
 			goto out;
 
-		if (skb->len != mss_now || (flags & MSG_OOB))
+		if (skb->len < mss_now || (flags & MSG_OOB))
 			continue;
 
 		if (forced_push(tp)) {
@@ -713,6 +714,7 @@ wait_for_memory:
 			goto do_error;
 
 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+		size_goal = tp->xmit_size_goal;
 	}
 
 out:
@@ -754,7 +756,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 
 static inline int select_size(struct sock *sk, struct tcp_sock *tp)
 {
-	int tmp = tp->mss_cache_std;
+	int tmp = tp->mss_cache;
 
 	if (sk->sk_route_caps & NETIF_F_SG) {
 		if (sk->sk_route_caps & NETIF_F_TSO)
@@ -778,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int iovlen, flags;
-	int mss_now;
+	int mss_now, size_goal;
 	int err, copied;
 	long timeo;
 
@@ -797,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
 	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+	size_goal = tp->xmit_size_goal;
 
 	/* Ok commence sending. */
 	iovlen = msg->msg_iovlen;
@@ -819,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			skb = sk->sk_write_queue.prev;
 
 			if (!sk->sk_send_head ||
-			    (copy = mss_now - skb->len) <= 0) {
+			    (copy = size_goal - skb->len) <= 0) {
 
 new_segment:
 				/* Allocate new segment. If the interface is SG,
@@ -842,7 +845,7 @@ new_segment:
 					skb->ip_summed = CHECKSUM_HW;
 
 				skb_entail(sk, tp, skb);
-				copy = mss_now;
+				copy = size_goal;
 			}
 
 			/* Try to append data to the end of skb. */
@@ -937,7 +940,7 @@ new_segment:
 			if ((seglen -= copy) == 0 && iovlen == 0)
 				goto out;
 
-			if (skb->len != mss_now || (flags & MSG_OOB))
+			if (skb->len < mss_now || (flags & MSG_OOB))
 				continue;
 
 			if (forced_push(tp)) {
@@ -957,6 +960,7 @@ wait_for_memory:
 				goto do_error;
 
 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+			size_goal = tp->xmit_size_goal;
 		}
 	}
 
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_rto = jiffies_to_usecs(tp->rto);
 	info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
-	info->tcpi_snd_mss = tp->mss_cache_std;
+	info->tcpi_snd_mss = tp->mss_cache;
 	info->tcpi_rcv_mss = tp->ack.rcv_mss;
 
 	info->tcpi_unacked = tp->packets_out;
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 
 	switch (optname) {
 	case TCP_MAXSEG:
-		val = tp->mss_cache_std;
+		val = tp->mss_cache;
 		if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
 			val = tp->rx_opt.user_mss;
 		break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2ef2f355b8b..8de2f1071c2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
 	if (!cwnd) {
-		if (tp->mss_cache_std > 1460)
+		if (tp->mss_cache > 1460)
 			cwnd = 2;
 		else
-			cwnd = (tp->mss_cache_std > 1095) ? 3 : 4;
+			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
 	}
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
@@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (sk->sk_route_caps & NETIF_F_TSO) {
 		sk->sk_route_caps &= ~NETIF_F_TSO;
 		sock_set_flag(sk, SOCK_NO_LARGESEND);
-		tp->mss_cache = tp->mss_cache_std;
+		tp->mss_cache = tp->mss_cache;
 	}
 
 	if (!tp->sacked_out)
@@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			    (IsFack(tp) ||
 			     !before(lost_retrans,
 				     TCP_SKB_CB(skb)->ack_seq + tp->reordering *
-				     tp->mss_cache_std))) {
+				     tp->mss_cache))) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 
@@ -3334,7 +3334,7 @@ static void tcp_new_space(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tcp_should_expand_sndbuf(sk, tp)) {
- 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) +
+ 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
 						   tp->reordering + 1);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ebf112347a9..62f62bb05c2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	 */
 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
 	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache_std = tp->mss_cache = 536;
+	tp->mss_cache = 536;
 
 	tp->reordering = sysctl_tcp_reordering;
 	tp->ca_ops = &tcp_init_congestion_ops;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0a4cd24b657..fd3ce38184a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
  * will allow a single TSO frame to consume.  Building TSO frames
  * which are too large can cause TCP streams to be bursty.
  */
-int sysctl_tcp_tso_win_divisor = 8;
+int sysctl_tcp_tso_win_divisor = 3;
 
 static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
 				    struct sk_buff *skb)
@@ -403,21 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 		sk->sk_send_head = skb;
 }
 
-static inline void tcp_tso_set_push(struct sk_buff *skb)
-{
-	/* Force push to be on for any TSO frames to workaround
-	 * problems with busted implementations like Mac OS-X that
-	 * hold off socket receive wakeups until push is seen.
-	 */
-	if (tcp_skb_pcount(skb) > 1)
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-}
-
 static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (skb->len <= tp->mss_cache_std ||
+	if (skb->len <= tp->mss_cache ||
 	    !(sk->sk_route_caps & NETIF_F_TSO)) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
@@ -427,164 +417,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
 	} else {
 		unsigned int factor;
 
-		factor = skb->len + (tp->mss_cache_std - 1);
-		factor /= tp->mss_cache_std;
+		factor = skb->len + (tp->mss_cache - 1);
+		factor /= tp->mss_cache;
 		skb_shinfo(skb)->tso_segs = factor;
-		skb_shinfo(skb)->tso_size = tp->mss_cache_std;
-	}
-}
-
-/* Does SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
-{
-	u32 end_seq = TCP_SKB_CB(skb)->end_seq;
-
-	return !after(end_seq, tp->snd_una + tp->snd_wnd);
-}
-
-/* Can at least one segment of SKB be sent right now, according to the
- * congestion window rules?  If so, return how many segments are allowed.
- */
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
-{
-	u32 in_flight, cwnd;
-
-	/* Don't be strict about the congestion window for the final FIN.  */
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
-		return 1;
-
-	in_flight = tcp_packets_in_flight(tp);
-	cwnd = tp->snd_cwnd;
-	if (in_flight < cwnd)
-		return (cwnd - in_flight);
-
-	return 0;
-}
-
-static inline int tcp_minshall_check(const struct tcp_sock *tp)
-{
-	return after(tp->snd_sml,tp->snd_una) &&
-		!after(tp->snd_sml, tp->snd_nxt);
-}
-
-/* Return 0, if packet can be sent now without violation Nagle's rules:
- * 1. It is full sized.
- * 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_NODELAY was set.
- * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
- *    With Minshall's modification: all sent small packets are ACKed.
- */
-
-static inline int tcp_nagle_check(const struct tcp_sock *tp,
-				  const struct sk_buff *skb, 
-				  unsigned mss_now, int nonagle)
-{
-	return (skb->len < mss_now &&
-		((nonagle&TCP_NAGLE_CORK) ||
-		 (!nonagle &&
-		  tp->packets_out &&
-		  tcp_minshall_check(tp))));
-}
-
-/* Return non-zero if the Nagle test allows this packet to be
- * sent now.
- */
-static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	/* Nagle rule does not apply to frames, which sit in the middle of the
-	 * write_queue (they have no chances to get new data).
-	 *
-	 * This is implemented in the callers, where they modify the 'nonagle'
-	 * argument based upon the location of SKB in the send queue.
-	 */
-	if (nonagle & TCP_NAGLE_PUSH)
-		return 1;
-
-	/* Don't use the nagle rule for urgent data (or for the final FIN).  */
-	if (tp->urg_mode ||
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
-		return 1;
-
-	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
-		return 1;
-
-	return 0;
-}
-
-/* This must be invoked the first time we consider transmitting
- * SKB onto the wire.
- */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
-{
-	int tso_segs = tcp_skb_pcount(skb);
-
-	if (!tso_segs) {
-		tcp_set_skb_tso_segs(sk, skb);
-		tso_segs = tcp_skb_pcount(skb);
-	}
-	return tso_segs;
-}
-
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(sk, skb);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota &&
-	    !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-static inline int tcp_skb_is_last(const struct sock *sk, 
-				  const struct sk_buff *skb)
-{
-	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
-{
-	struct sk_buff *skb = sk->sk_send_head;
-
-	return (skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
-			     (tcp_skb_is_last(sk, skb) ?
-			      TCP_NAGLE_PUSH :
-			      tp->nonagle)));
-}
-
-
-/* Send _single_ skb sitting at the send head. This function requires
- * true push pending frames to setup probe timer etc.
- */
-void tcp_push_one(struct sock *sk, unsigned cur_mss)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = sk->sk_send_head;
-
-	if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) {
-		/* Send it out now. */
-		TCP_SKB_CB(skb)->when = tcp_time_stamp;
-		tcp_tso_set_push(skb);
-		if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
-			sk->sk_send_head = NULL;
-			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-			tcp_packets_out_inc(sk, tp, skb);
-			return;
-		}
+		skb_shinfo(skb)->tso_size = tp->mss_cache;
 	}
 }
 
@@ -791,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 
 	/* And store cached results */
 	tp->pmtu_cookie = pmtu;
-	tp->mss_cache = tp->mss_cache_std = mss_now;
+	tp->mss_cache = mss_now;
 
 	return mss_now;
 }
@@ -803,56 +639,47 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
  * cannot be large. However, taking into account rare use of URG, this
  * is not a big flaw.
  */
-
-unsigned int tcp_current_mss(struct sock *sk, int large)
+unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
-	unsigned int do_large, mss_now;
+	u32 mss_now;
+	u16 xmit_size_goal;
+	int doing_tso = 0;
+
+	mss_now = tp->mss_cache;
+
+	if (large_allowed &&
+	    (sk->sk_route_caps & NETIF_F_TSO) &&
+	    !tp->urg_mode)
+		doing_tso = 1;
 
-	mss_now = tp->mss_cache_std;
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
 		if (mtu != tp->pmtu_cookie)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
-	do_large = (large &&
-		    (sk->sk_route_caps & NETIF_F_TSO) &&
-		    !tp->urg_mode);
+	if (tp->rx_opt.eff_sacks)
+		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
 
-	if (do_large) {
-		unsigned int large_mss, factor, limit;
+	xmit_size_goal = mss_now;
 
-		large_mss = 65535 - tp->af_specific->net_header_len -
+	if (doing_tso) {
+		xmit_size_goal = 65535 -
+			tp->af_specific->net_header_len -
 			tp->ext_header_len - tp->tcp_header_len;
 
-		if (tp->max_window && large_mss > (tp->max_window>>1))
-			large_mss = max((tp->max_window>>1),
-					68U - tp->tcp_header_len);
-
-		factor = large_mss / mss_now;
+		if (tp->max_window &&
+		    (xmit_size_goal > (tp->max_window >> 1)))
+			xmit_size_goal = max((tp->max_window >> 1),
+					     68U - tp->tcp_header_len);
 
-		/* Always keep large mss multiple of real mss, but
-		 * do not exceed 1/tso_win_divisor of the congestion window
-		 * so we can keep the ACK clock ticking and minimize
-		 * bursting.
-		 */
-		limit = tp->snd_cwnd;
-		if (sysctl_tcp_tso_win_divisor)
-			limit /= sysctl_tcp_tso_win_divisor;
-		limit = max(1U, limit);
-		if (factor > limit)
-			factor = limit;
-
-		tp->mss_cache = mss_now * factor;
-
-		mss_now = tp->mss_cache;
+		xmit_size_goal -= (xmit_size_goal % mss_now);
 	}
+	tp->xmit_size_goal = xmit_size_goal;
 
-	if (tp->rx_opt.eff_sacks)
-		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
-			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
 	return mss_now;
 }
 
@@ -876,6 +703,251 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
 	}
 }
 
+static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+{
+	u32 window, cwnd_len;
+
+	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
+	cwnd_len = mss_now * cwnd;
+	return min(window, cwnd_len);
+}
+
+/* Can at least one segment of SKB be sent right now, according to the
+ * congestion window rules?  If so, return how many segments are allowed.
+ */
+static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	u32 in_flight, cwnd;
+
+	/* Don't be strict about the congestion window for the final FIN.  */
+	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+		return 1;
+
+	in_flight = tcp_packets_in_flight(tp);
+	cwnd = tp->snd_cwnd;
+	if (in_flight < cwnd)
+		return (cwnd - in_flight);
+
+	return 0;
+}
+
+/* This must be invoked the first time we consider transmitting
+ * SKB onto the wire.
+ */
+static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
+{
+	int tso_segs = tcp_skb_pcount(skb);
+
+	if (!tso_segs) {
+		tcp_set_skb_tso_segs(sk, skb);
+		tso_segs = tcp_skb_pcount(skb);
+	}
+	return tso_segs;
+}
+
+static inline int tcp_minshall_check(const struct tcp_sock *tp)
+{
+	return after(tp->snd_sml,tp->snd_una) &&
+		!after(tp->snd_sml, tp->snd_nxt);
+}
+
+/* Return 0, if packet can be sent now without violation Nagle's rules:
+ * 1. It is full sized.
+ * 2. Or it contains FIN. (already checked by caller)
+ * 3. Or TCP_NODELAY was set.
+ * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
+ *    With Minshall's modification: all sent small packets are ACKed.
+ */
+
+static inline int tcp_nagle_check(const struct tcp_sock *tp,
+				  const struct sk_buff *skb, 
+				  unsigned mss_now, int nonagle)
+{
+	return (skb->len < mss_now &&
+		((nonagle&TCP_NAGLE_CORK) ||
+		 (!nonagle &&
+		  tp->packets_out &&
+		  tcp_minshall_check(tp))));
+}
+
+/* Return non-zero if the Nagle test allows this packet to be
+ * sent now.
+ */
+static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
+				 unsigned int cur_mss, int nonagle)
+{
+	/* Nagle rule does not apply to frames, which sit in the middle of the
+	 * write_queue (they have no chances to get new data).
+	 *
+	 * This is implemented in the callers, where they modify the 'nonagle'
+	 * argument based upon the location of SKB in the send queue.
+	 */
+	if (nonagle & TCP_NAGLE_PUSH)
+		return 1;
+
+	/* Don't use the nagle rule for urgent data (or for the final FIN).  */
+	if (tp->urg_mode ||
+	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+		return 1;
+
+	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
+		return 1;
+
+	return 0;
+}
+
+/* Does at least the first segment of SKB fit into the send window? */
+static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
+{
+	u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+	if (skb->len > cur_mss)
+		end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
+
+	return !after(end_seq, tp->snd_una + tp->snd_wnd);
+}
+
+/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+ * should be put on the wire right now.  If so, it returns the number of
+ * packets allowed by the congestion window.
+ */
+static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
+				 unsigned int cur_mss, int nonagle)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int cwnd_quota;
+
+	tcp_init_tso_segs(sk, skb);
+
+	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
+		return 0;
+
+	cwnd_quota = tcp_cwnd_test(tp, skb);
+	if (cwnd_quota &&
+	    !tcp_snd_wnd_test(tp, skb, cur_mss))
+		cwnd_quota = 0;
+
+	return cwnd_quota;
+}
+
+static inline int tcp_skb_is_last(const struct sock *sk, 
+				  const struct sk_buff *skb)
+{
+	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
+}
+
+int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
+{
+	struct sk_buff *skb = sk->sk_send_head;
+
+	return (skb &&
+		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
+			     (tcp_skb_is_last(sk, skb) ?
+			      TCP_NAGLE_PUSH :
+			      tp->nonagle)));
+}
+
+/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
+ * which is put after SKB on the list.  It is very much like
+ * tcp_fragment() except that it may make several kinds of assumptions
+ * in order to speed up the splitting operation.  In particular, we
+ * know that all the data is in scatter-gather pages, and that the
+ * packet has never been sent out before (and thus is not cloned).
+ */
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
+{
+	struct sk_buff *buff;
+	int nlen = skb->len - len;
+	u16 flags;
+
+	/* All of a TSO frame must be composed of paged data.  */
+	BUG_ON(skb->len != skb->data_len);
+
+	buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
+	if (unlikely(buff == NULL))
+		return -ENOMEM;
+
+	buff->truesize = nlen;
+	skb->truesize -= nlen;
+
+	/* Correct the sequence numbers. */
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
+
+	/* PSH and FIN should only be set in the second packet. */
+	flags = TCP_SKB_CB(skb)->flags;
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+	TCP_SKB_CB(buff)->flags = flags;
+
+	/* This packet was never sent out yet, so no SACK bits. */
+	TCP_SKB_CB(buff)->sacked = 0;
+
+	buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+	skb_split(skb, buff, len);
+
+	/* Fix up tso_factor for both original and new SKB.  */
+	tcp_set_skb_tso_segs(sk, skb);
+	tcp_set_skb_tso_segs(sk, buff);
+
+	/* Link BUFF into the send queue. */
+	skb_header_release(buff);
+	__skb_append(skb, buff);
+
+	return 0;
+}
+
+/* Try to defer sending, if possible, in order to minimize the amount
+ * of TSO splitting we do.  View it as a kind of TSO Nagle test.
+ *
+ * This algorithm is from John Heffner.
+ */
+static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+{
+	u32 send_win, cong_win, limit, in_flight;
+
+	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+		return 0;
+
+	in_flight = tcp_packets_in_flight(tp);
+
+	BUG_ON(tcp_skb_pcount(skb) <= 1 ||
+	       (tp->snd_cwnd <= in_flight));
+
+	send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
+
+	/* From in_flight test above, we know that cwnd > in_flight.  */
+	cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+
+	limit = min(send_win, cong_win);
+
+	/* If sk_send_head can be sent fully now, just do it.  */
+	if (skb->len <= limit)
+		return 0;
+
+	if (sysctl_tcp_tso_win_divisor) {
+		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+
+		/* If at least some fraction of a window is available,
+		 * just use it.
+		 */
+		chunk /= sysctl_tcp_tso_win_divisor;
+		if (limit >= chunk)
+			return 0;
+	} else {
+		/* Different approach, try not to defer past a single
+		 * ACK.  Receiver should ACK every other full sized
+		 * frame, so if we have space for more than 3 frames
+		 * then send now.
+		 */
+		if (limit > tcp_max_burst(tp) * tp->mss_cache)
+			return 0;
+	}
+
+	/* Ok, it looks like it is advisable to defer.  */
+	return 1;
+}
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -887,8 +959,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int tso_segs, cwnd_quota;
-	int sent_pkts;
+	unsigned int tso_segs, sent_pkts;
+	int cwnd_quota;
 
 	/* If we are closed, the bytes will have to remain here.
 	 * In time closedown will finish, we empty the write queue and all
@@ -903,24 +975,44 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 
 	tso_segs = tcp_init_tso_segs(sk, skb);
 	cwnd_quota = tcp_cwnd_test(tp, skb);
+	if (unlikely(!cwnd_quota))
+		goto out;
+
 	sent_pkts = 0;
+	while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
+		BUG_ON(!tso_segs);
 
-	while (cwnd_quota >= tso_segs) {
-		if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
-					     (tcp_skb_is_last(sk, skb) ?
-					      nonagle : TCP_NAGLE_PUSH))))
-			break;
+		if (tso_segs == 1) {
+			if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
+						     (tcp_skb_is_last(sk, skb) ?
+						      nonagle : TCP_NAGLE_PUSH))))
+				break;
+		} else {
+			if (tcp_tso_should_defer(sk, tp, skb))
+				break;
+		}
 
-		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
-			break;
+		if (tso_segs > 1) {
+			u32 limit = tcp_window_allows(tp, skb,
+						      mss_now, cwnd_quota);
+
+			if (skb->len < limit) {
+				unsigned int trim = skb->len % mss_now;
 
-		if (unlikely(skb->len > mss_now)) {
+				if (trim)
+					limit = skb->len - trim;
+			}
+			if (skb->len > limit) {
+				if (tso_fragment(sk, skb, limit))
+					break;
+			}
+		} else if (unlikely(skb->len > mss_now)) {
 			if (unlikely(tcp_fragment(sk, skb,  mss_now)))
 				break;
 		}
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
-		tcp_tso_set_push(skb);
+
 		if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
 			break;
 
@@ -936,6 +1028,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		 * the packet above, tso_segs will no longer be valid.
 		 */
 		cwnd_quota -= tcp_skb_pcount(skb);
+
+		BUG_ON(cwnd_quota < 0);
+		if (!cwnd_quota)
+			break;
+
 		skb = sk->sk_send_head;
 		if (!skb)
 			break;
@@ -946,7 +1043,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		tcp_cwnd_validate(sk, tp);
 		return 0;
 	}
-
+out:
 	return !tp->packets_out && sk->sk_send_head;
 }
 
@@ -965,6 +1062,53 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 	}
 }
 
+/* Send _single_ skb sitting at the send head. This function requires
+ * true push pending frames to setup probe timer etc.
+ */
+void tcp_push_one(struct sock *sk, unsigned int mss_now)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb = sk->sk_send_head;
+	unsigned int tso_segs, cwnd_quota;
+
+	BUG_ON(!skb || skb->len < mss_now);
+
+	tso_segs = tcp_init_tso_segs(sk, skb);
+	cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
+
+	if (likely(cwnd_quota)) {
+		BUG_ON(!tso_segs);
+
+		if (tso_segs > 1) {
+			u32 limit = tcp_window_allows(tp, skb,
+						      mss_now, cwnd_quota);
+
+			if (skb->len < limit) {
+				unsigned int trim = skb->len % mss_now;
+
+				if (trim)
+					limit = skb->len - trim;
+			}
+			if (skb->len > limit) {
+				if (unlikely(tso_fragment(sk, skb, limit)))
+					return;
+			}
+		} else if (unlikely(skb->len > mss_now)) {
+			if (unlikely(tcp_fragment(sk, skb, mss_now)))
+				return;
+		}
+
+		/* Send it out now. */
+		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+		if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+			update_send_head(sk, tp, skb);
+			tcp_cwnd_validate(sk, tp);
+			return;
+		}
+	}
+}
+
 /* This function returns the amount that we can raise the
  * usable window based on the following constraints
  *  
@@ -1222,7 +1366,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_route_caps & NETIF_F_TSO) {
 			sk->sk_route_caps &= ~NETIF_F_TSO;
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
-			tp->mss_cache = tp->mss_cache_std;
 		}
 
 		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
@@ -1284,7 +1427,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * is still in somebody's hands, else make a clone.
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_tso_set_push(skb);
 
 	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
 				    pskb_copy(skb, GFP_ATOMIC):
@@ -1853,14 +1995,12 @@ int tcp_write_wakeup(struct sock *sk)
 				if (sk->sk_route_caps & NETIF_F_TSO) {
 					sock_set_flag(sk, SOCK_NO_LARGESEND);
 					sk->sk_route_caps &= ~NETIF_F_TSO;
-					tp->mss_cache = tp->mss_cache_std;
 				}
 			} else if (!tcp_skb_pcount(skb))
 				tcp_set_skb_tso_segs(sk, skb);
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			tcp_tso_set_push(skb);
 			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 			if (!err) {
 				update_send_head(sk, tp, skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9dac7fdf472..f6e288dc116 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	 */
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_clamp = ~0;
-	tp->mss_cache_std = tp->mss_cache = 536;
+	tp->mss_cache = 536;
 
 	tp->reordering = sysctl_tcp_reordering;
 
-- 
cgit v1.2.3-70-g09d2


From 6772926bef3c9f0ec761b39e5702535471fff70b Mon Sep 17 00:00:00 2001
From: Rusty Lynch <rusty.lynch@intel.com>
Date: Tue, 5 Jul 2005 18:54:50 -0700
Subject: [PATCH] kprobes: fix namespace problem and sparc64 build

The following renames arch_init, a kprobes function for performing any
architecture specific initialization, to arch_init_kprobes in order to
cleanup the namespace.

Also, this patch adds arch_init_kprobes to sparc64 to fix the sparc64 kprobes
build from the last return probe patch.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/kprobes.c    | 2 +-
 arch/ia64/kernel/kprobes.c    | 2 +-
 arch/ppc64/kernel/kprobes.c   | 2 +-
 arch/sparc64/kernel/kprobes.c | 5 +++++
 arch/x86_64/kernel/kprobes.c  | 2 +-
 include/linux/kprobes.h       | 2 +-
 kernel/kprobes.c              | 2 +-
 7 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index fc8b1752176..a6d8c45961d 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -537,7 +537,7 @@ static struct kprobe trampoline_p = {
 	.pre_handler = trampoline_probe_handler
 };
 
-int __init arch_init(void)
+int __init arch_init_kprobes(void)
 {
 	return register_kprobe(&trampoline_p);
 }
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 3aa3167edbe..884f5cd27d8 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -713,7 +713,7 @@ static struct kprobe trampoline_p = {
 	.pre_handler = trampoline_probe_handler
 };
 
-int __init arch_init(void)
+int __init arch_init_kprobes(void)
 {
 	trampoline_p.addr =
 		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index 1d2ff6d6b0b..a3d519518fb 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -444,7 +444,7 @@ static struct kprobe trampoline_p = {
 	.pre_handler = trampoline_probe_handler
 };
 
-int __init arch_init(void)
+int __init arch_init_kprobes(void)
 {
 	return register_kprobe(&trampoline_p);
 }
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index bdac631cf01..bbf11f85dab 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -433,3 +433,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+/* architecture specific initialization */
+int arch_init_kprobes(void)
+{
+	return 0;
+}
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index acd2a778ebe..5c6dc705148 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -682,7 +682,7 @@ static struct kprobe trampoline_p = {
 	.pre_handler = trampoline_probe_handler
 };
 
-int __init arch_init(void)
+int __init arch_init_kprobes(void)
 {
 	return register_kprobe(&trampoline_p);
 }
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b7a194c4362..e050fc2d4c2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -155,7 +155,7 @@ extern void arch_copy_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
 extern void arch_remove_kprobe(struct kprobe *p);
-extern int arch_init(void);
+extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 90c0e82b650..b0237122b24 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -574,7 +574,7 @@ static int __init init_kprobes(void)
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
 	}
 
-	err = arch_init();
+	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
 
-- 
cgit v1.2.3-70-g09d2