From 024ac44c701d43f5e2d34bd6a35b2813a36e6010 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 29 May 2005 02:26:31 -0500 Subject: Input: This patch implements compat_ioctl for joydev. I've tested it with a Logitech WingMan Rumblepad on an x86-64 machine, and on an ia32 machine to make sure I didn't break anything. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Vojtech Pavlik Signed-off-by: Dmitry Torokhov --- include/linux/joystick.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/joystick.h b/include/linux/joystick.h index b7e0ab622cd..06b9af77eb7 100644 --- a/include/linux/joystick.h +++ b/include/linux/joystick.h @@ -111,18 +111,35 @@ struct js_corr { #define JS_SET_ALL 8 struct JS_DATA_TYPE { - int buttons; - int x; - int y; + __s32 buttons; + __s32 x; + __s32 y; }; -struct JS_DATA_SAVE_TYPE { - int JS_TIMEOUT; - int BUSY; - long JS_EXPIRETIME; - long JS_TIMELIMIT; +struct JS_DATA_SAVE_TYPE_32 { + __s32 JS_TIMEOUT; + __s32 BUSY; + __s32 JS_EXPIRETIME; + __s32 JS_TIMELIMIT; struct JS_DATA_TYPE JS_SAVE; struct JS_DATA_TYPE JS_CORR; }; +struct JS_DATA_SAVE_TYPE_64 { + __s32 JS_TIMEOUT; + __s32 BUSY; + __s64 JS_EXPIRETIME; + __s64 JS_TIMELIMIT; + struct JS_DATA_TYPE JS_SAVE; + struct JS_DATA_TYPE JS_CORR; +}; + +#if BITS_PER_LONG == 64 +#define JS_DATA_SAVE_TYPE JS_DATA_SAVE_TYPE_64 +#elif BITS_PER_LONG == 32 +#define JS_DATA_SAVE_TYPE JS_DATA_SAVE_TYPE_32 +#else +#error Unexpected BITS_PER_LONG +#endif + #endif /* _LINUX_JOYSTICK_H */ -- cgit v1.2.3-70-g09d2 From 0fbf87caf70acec0c435233fbc39c7bd0aca3ca6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 29 May 2005 02:29:25 -0500 Subject: Input: add semaphore and user count to input_dev structure; serialize open and close calls and ensure that device's open and close methods are only called when first user opens it or last user closes it. Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 33 ++++++++++++++++++++++++++++----- include/linux/input.h | 4 ++++ 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 3385dd03abf..1885f369e3e 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -219,10 +219,24 @@ void input_release_device(struct input_handle *handle) int input_open_device(struct input_handle *handle) { + struct input_dev *dev = handle->dev; + int err; + + err = down_interruptible(&dev->sem); + if (err) + return err; + handle->open++; - if (handle->dev->open) - return handle->dev->open(handle->dev); - return 0; + + if (!dev->users++ && dev->open) + err = dev->open(dev); + + if (err) + handle->open--; + + up(&dev->sem); + + return err; } int input_flush_device(struct input_handle* handle, struct file* file) @@ -235,10 +249,17 @@ int input_flush_device(struct input_handle* handle, struct file* file) void input_close_device(struct input_handle *handle) { + struct input_dev *dev = handle->dev; + input_release_device(handle); - if (handle->dev->close) - handle->dev->close(handle->dev); + + down(&dev->sem); + + if (!--dev->users && dev->close) + dev->close(dev); handle->open--; + + up(&dev->sem); } static void input_link_handle(struct input_handle *handle) @@ -415,6 +436,8 @@ void input_register_device(struct input_dev *dev) set_bit(EV_SYN, dev->evbit); + init_MUTEX(&dev->sem); + /* * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. diff --git a/include/linux/input.h b/include/linux/input.h index 72731d7d189..43e8ecec602 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -859,6 +859,10 @@ struct input_dev { int (*erase_effect)(struct input_dev *dev, int effect_id); struct input_handle *grab; + + struct semaphore sem; /* serializes open and close operations */ + unsigned int users; + struct device *dev; struct list_head h_list; -- cgit v1.2.3-70-g09d2 From c611763d048990de5cdf848d97af6392f8fa7430 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 1 Jun 2005 02:39:51 -0500 Subject: Input: add ps2_drain() to libps2 to allow reading and discarding given number of bytes from device. Change ps2_command to allow using 0 as command ID and actually pass it to the device instead of working as a drain. Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 3 +-- drivers/input/serio/libps2.c | 46 ++++++++++++++++++++++++++++++++++++-------- include/linux/libps2.h | 1 + 3 files changed, 40 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 2679a165d39..ffdc8231319 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -270,7 +270,6 @@ static struct alps_model_info *alps_get_model(struct psmouse *psmouse, int *vers static int alps_passthrough_mode(struct psmouse *psmouse, int enable) { struct ps2dev *ps2dev = &psmouse->ps2dev; - unsigned char param[3]; int cmd = enable ? PSMOUSE_CMD_SETSCALE21 : PSMOUSE_CMD_SETSCALE11; if (ps2_command(ps2dev, NULL, cmd) || @@ -280,7 +279,7 @@ static int alps_passthrough_mode(struct psmouse *psmouse, int enable) return -1; /* we may get 3 more bytes, just ignore them */ - ps2_command(ps2dev, param, 0x0300); + ps2_drain(ps2dev, 3, 100); return 0; } diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index c978657068c..92b92ee0379 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -29,6 +29,7 @@ MODULE_LICENSE("GPL"); EXPORT_SYMBOL(ps2_init); EXPORT_SYMBOL(ps2_sendbyte); +EXPORT_SYMBOL(ps2_drain); EXPORT_SYMBOL(ps2_command); EXPORT_SYMBOL(ps2_schedule_command); EXPORT_SYMBOL(ps2_handle_ack); @@ -45,11 +46,11 @@ struct ps2work { /* - * ps2_sendbyte() sends a byte to the mouse, and waits for acknowledge. - * It doesn't handle retransmission, though it could - because when there would - * be need for retransmissions, the mouse has to be replaced anyway. + * ps2_sendbyte() sends a byte to the device and waits for acknowledge. + * It doesn't handle retransmission, though it could - because if there + * is a need for retransmissions device has to be replaced anyway. * - * ps2_sendbyte() can only be called from a process context + * ps2_sendbyte() can only be called from a process context. */ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout) @@ -71,6 +72,31 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout) return -ps2dev->nak; } +/* + * ps2_drain() waits for device to transmit requested number of bytes + * and discards them. + */ + +void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout) +{ + if (maxbytes > sizeof(ps2dev->cmdbuf)) { + WARN_ON(1); + maxbytes = sizeof(ps2dev->cmdbuf); + } + + down(&ps2dev->cmd_sem); + + serio_pause_rx(ps2dev->serio); + ps2dev->flags = PS2_FLAG_CMD; + ps2dev->cmdcnt = maxbytes; + serio_continue_rx(ps2dev->serio); + + wait_event_timeout(ps2dev->wait, + !(ps2dev->flags & PS2_FLAG_CMD), + msecs_to_jiffies(timeout)); + up(&ps2dev->cmd_sem); +} + /* * ps2_command() sends a command and its parameters to the mouse, * then waits for the response and puts it in the param array. @@ -86,6 +112,11 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) int rc = -1; int i; + if (receive > sizeof(ps2dev->cmdbuf)) { + WARN_ON(1); + return -1; + } + down(&ps2dev->cmd_sem); serio_pause_rx(ps2dev->serio); @@ -101,10 +132,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) * ACKing the reset command, and so it can take a long * time before the ACK arrrives. */ - if (command & 0xff) - if (ps2_sendbyte(ps2dev, command & 0xff, - command == PS2_CMD_RESET_BAT ? 1000 : 200)) - goto out; + if (ps2_sendbyte(ps2dev, command & 0xff, + command == PS2_CMD_RESET_BAT ? 1000 : 200)) + goto out; for (i = 0; i < send; i++) if (ps2_sendbyte(ps2dev, param[i], 200)) diff --git a/include/linux/libps2.h b/include/linux/libps2.h index 923bdbc6d9e..a710bddda4e 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -41,6 +41,7 @@ struct ps2dev { void ps2_init(struct ps2dev *ps2dev, struct serio *serio); int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); +void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_schedule_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); -- cgit v1.2.3-70-g09d2 From dbf4ccd6043e58ed32fbf253fb3f0a9991e4c13a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 1 Jun 2005 02:40:01 -0500 Subject: Input: psmouse - export protocol as a sysfs per-device attribute to allow easy switching at run-time. Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-base.c | 291 +++++++++++++++++++++++++++++++------ drivers/input/mouse/psmouse.h | 1 + drivers/input/serio/serio.c | 18 ++- include/linux/serio.h | 6 + 4 files changed, 266 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 259e6b70544..19785a6c5ab 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -32,15 +32,14 @@ MODULE_AUTHOR("Vojtech Pavlik "); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static unsigned int psmouse_max_proto = -1U; +static unsigned int psmouse_max_proto = PSMOUSE_AUTO; static int psmouse_set_maxproto(const char *val, struct kernel_param *kp); static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp); -static char *psmouse_proto_abbrev[] = { NULL, "bare", NULL, NULL, NULL, "imps", "exps", NULL, NULL, "lifebook" }; #define param_check_proto_abbrev(name, p) __param_check(name, p, unsigned int) #define param_set_proto_abbrev psmouse_set_maxproto #define param_get_proto_abbrev psmouse_get_maxproto module_param_named(proto, psmouse_max_proto, proto_abbrev, 0644); -MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, lifebook, any). Useful for KVM switches."); +MODULE_PARM_DESC(proto, "Highest protocol extension to probe (bare, imps, exps, any). Useful for KVM switches."); static unsigned int psmouse_resolution = 200; module_param_named(resolution, psmouse_resolution, uint, 0644); @@ -58,6 +57,7 @@ static unsigned int psmouse_resetafter; module_param_named(resetafter, psmouse_resetafter, uint, 0644); MODULE_PARM_DESC(resetafter, "Reset device after so many bad packets (0 = never)."); +PSMOUSE_DEFINE_ATTR(protocol); PSMOUSE_DEFINE_ATTR(rate); PSMOUSE_DEFINE_ATTR(resolution); PSMOUSE_DEFINE_ATTR(resetafter); @@ -77,7 +77,14 @@ __obsolete_setup("psmouse_rate="); */ static DECLARE_MUTEX(psmouse_sem); -static char *psmouse_protocols[] = { "None", "PS/2", "PS2++", "ThinkPS/2", "GenPS/2", "ImPS/2", "ImExPS/2", "SynPS/2", "AlpsPS/2", "LBPS/2" }; +struct psmouse_protocol { + enum psmouse_type type; + char *name; + char *alias; + int maxproto; + int (*detect)(struct psmouse *, int); + int (*init)(struct psmouse *); +}; /* * psmouse_process_byte() analyzes the PS/2 data stream and reports @@ -417,12 +424,15 @@ static int thinking_detect(struct psmouse *psmouse, int set_properties) */ static int ps2bare_detect(struct psmouse *psmouse, int set_properties) { - if (!psmouse->vendor) psmouse->vendor = "Generic"; - if (!psmouse->name) psmouse->name = "Mouse"; + if (set_properties) { + if (!psmouse->vendor) psmouse->vendor = "Generic"; + if (!psmouse->name) psmouse->name = "Mouse"; + } return 0; } + /* * psmouse_extensions() probes for any extensions to the basic PS/2 protocol * the mouse may have. @@ -437,9 +447,7 @@ static int psmouse_extensions(struct psmouse *psmouse, * We always check for lifebook because it does not disturb mouse * (it only checks DMI information). */ - if (lifebook_detect(psmouse, set_properties) == 0 || - max_proto == PSMOUSE_LIFEBOOK) { - + if (lifebook_detect(psmouse, set_properties) == 0) { if (max_proto > PSMOUSE_IMEX) { if (!set_properties || lifebook_init(psmouse) == 0) return PSMOUSE_LIFEBOOK; @@ -529,6 +537,103 @@ static int psmouse_extensions(struct psmouse *psmouse, return PSMOUSE_PS2; } +static struct psmouse_protocol psmouse_protocols[] = { + { + .type = PSMOUSE_PS2, + .name = "PS/2", + .alias = "bare", + .maxproto = 1, + .detect = ps2bare_detect, + }, + { + .type = PSMOUSE_PS2PP, + .name = "PS2++", + .alias = "logitech", + .detect = ps2pp_init, + }, + { + .type = PSMOUSE_THINKPS, + .name = "ThinkPS/2", + .alias = "thinkps", + .detect = thinking_detect, + }, + { + .type = PSMOUSE_GENPS, + .name = "GenPS/2", + .alias = "genius", + .detect = genius_detect, + }, + { + .type = PSMOUSE_IMPS, + .name = "ImPS/2", + .alias = "imps", + .maxproto = 1, + .detect = intellimouse_detect, + }, + { + .type = PSMOUSE_IMEX, + .name = "ImExPS/2", + .alias = "exps", + .maxproto = 1, + .detect = im_explorer_detect, + }, + { + .type = PSMOUSE_SYNAPTICS, + .name = "SynPS/2", + .alias = "synaptics", + .detect = synaptics_detect, + .init = synaptics_init, + }, + { + .type = PSMOUSE_ALPS, + .name = "AlpsPS/2", + .alias = "alps", + .detect = alps_detect, + .init = alps_init, + }, + { + .type = PSMOUSE_LIFEBOOK, + .name = "LBPS/2", + .alias = "lifebook", + .init = lifebook_init, + }, + { + .type = PSMOUSE_AUTO, + .name = "auto", + .alias = "any", + .maxproto = 1, + }, +}; + +static struct psmouse_protocol *psmouse_protocol_by_type(enum psmouse_type type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++) + if (psmouse_protocols[i].type == type) + return &psmouse_protocols[i]; + + WARN_ON(1); + return &psmouse_protocols[0]; +} + +static struct psmouse_protocol *psmouse_protocol_by_name(const char *name, size_t len) +{ + struct psmouse_protocol *p; + int i; + + for (i = 0; i < ARRAY_SIZE(psmouse_protocols); i++) { + p = &psmouse_protocols[i]; + + if ((strlen(p->name) == len && !strncmp(p->name, name, len)) || + (strlen(p->alias) == len && !strncmp(p->alias, name, len))) + return &psmouse_protocols[i]; + } + + return NULL; +} + + /* * psmouse_probe() probes for a PS/2 mouse. */ @@ -680,6 +785,7 @@ static void psmouse_disconnect(struct serio *serio) psmouse = serio_get_drvdata(serio); + device_remove_file(&serio->dev, &psmouse_attr_protocol); device_remove_file(&serio->dev, &psmouse_attr_rate); device_remove_file(&serio->dev, &psmouse_attr_resolution); device_remove_file(&serio->dev, &psmouse_attr_resetafter); @@ -712,6 +818,49 @@ static void psmouse_disconnect(struct serio *serio) up(&psmouse_sem); } +static int psmouse_switch_protocol(struct psmouse *psmouse, struct psmouse_protocol *proto) +{ + memset(&psmouse->dev, 0, sizeof(struct input_dev)); + + init_input_dev(&psmouse->dev); + + psmouse->dev.private = psmouse; + psmouse->dev.dev = &psmouse->ps2dev.serio->dev; + + psmouse->dev.evbit[0] = BIT(EV_KEY) | BIT(EV_REL); + psmouse->dev.keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); + psmouse->dev.relbit[0] = BIT(REL_X) | BIT(REL_Y); + + psmouse->set_rate = psmouse_set_rate; + psmouse->set_resolution = psmouse_set_resolution; + psmouse->protocol_handler = psmouse_process_byte; + psmouse->pktsize = 3; + + if (proto && (proto->detect || proto->init)) { + if (proto->detect && proto->detect(psmouse, 1) < 0) + return -1; + + if (proto->init && proto->init(psmouse) < 0) + return -1; + + psmouse->type = proto->type; + } + else + psmouse->type = psmouse_extensions(psmouse, psmouse_max_proto, 1); + + sprintf(psmouse->devname, "%s %s %s", + psmouse_protocol_by_type(psmouse->type)->name, psmouse->vendor, psmouse->name); + + psmouse->dev.name = psmouse->devname; + psmouse->dev.phys = psmouse->phys; + psmouse->dev.id.bustype = BUS_I8042; + psmouse->dev.id.vendor = 0x0002; + psmouse->dev.id.product = psmouse->type; + psmouse->dev.id.version = psmouse->model; + + return 0; +} + /* * psmouse_connect() is a callback from the serio module when * an unhandled serio port is found. @@ -739,11 +888,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) ps2_init(&psmouse->ps2dev, serio); sprintf(psmouse->phys, "%s/input0", serio->phys); - psmouse->dev.evbit[0] = BIT(EV_KEY) | BIT(EV_REL); - psmouse->dev.keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); - psmouse->dev.relbit[0] = BIT(REL_X) | BIT(REL_Y); - psmouse->dev.private = psmouse; - psmouse->dev.dev = &serio->dev; + psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); serio_set_drvdata(serio, psmouse); @@ -767,25 +912,10 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) psmouse->resolution = psmouse_resolution; psmouse->resetafter = psmouse_resetafter; psmouse->smartscroll = psmouse_smartscroll; - psmouse->set_rate = psmouse_set_rate; - psmouse->set_resolution = psmouse_set_resolution; - psmouse->protocol_handler = psmouse_process_byte; - psmouse->pktsize = 3; - - psmouse->type = psmouse_extensions(psmouse, psmouse_max_proto, 1); - - sprintf(psmouse->devname, "%s %s %s", - psmouse_protocols[psmouse->type], psmouse->vendor, psmouse->name); - psmouse->dev.name = psmouse->devname; - psmouse->dev.phys = psmouse->phys; - psmouse->dev.id.bustype = BUS_I8042; - psmouse->dev.id.vendor = 0x0002; - psmouse->dev.id.product = psmouse->type; - psmouse->dev.id.version = psmouse->model; + psmouse_switch_protocol(psmouse, NULL); input_register_device(&psmouse->dev); - printk(KERN_INFO "input: %s on %s\n", psmouse->devname, serio->phys); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); @@ -795,6 +925,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) if (parent && parent->pt_activate) parent->pt_activate(parent); + device_create_file(&serio->dev, &psmouse_attr_protocol); device_create_file(&serio->dev, &psmouse_attr_rate); device_create_file(&serio->dev, &psmouse_attr_resolution); device_create_file(&serio->dev, &psmouse_attr_resetafter); @@ -946,11 +1077,14 @@ ssize_t psmouse_attr_set_helper(struct device *dev, const char *buf, size_t coun parent = serio_get_drvdata(serio->parent); psmouse_deactivate(parent); } + psmouse_deactivate(psmouse); retval = handler(psmouse, buf, count); - psmouse_activate(psmouse); + if (retval != -ENODEV) + psmouse_activate(psmouse); + if (parent) psmouse_activate(parent); @@ -961,6 +1095,75 @@ ssize_t psmouse_attr_set_helper(struct device *dev, const char *buf, size_t coun return retval; } +static ssize_t psmouse_attr_show_protocol(struct psmouse *psmouse, char *buf) +{ + return sprintf(buf, "%s\n", psmouse_protocol_by_type(psmouse->type)->name); +} + +static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, const char *buf, size_t count) +{ + struct serio *serio = psmouse->ps2dev.serio; + struct psmouse *parent = NULL; + struct psmouse_protocol *proto; + int retry = 0; + + if (!(proto = psmouse_protocol_by_name(buf, count))) + return -EINVAL; + + if (psmouse->type == proto->type) + return count; + + while (serio->child) { + if (++retry > 3) { + printk(KERN_WARNING "psmouse: failed to destroy child port, protocol change aborted.\n"); + return -EIO; + } + + up(&psmouse_sem); + serio_unpin_driver(serio); + serio_unregister_child_port(serio); + serio_pin_driver_uninterruptible(serio); + down(&psmouse_sem); + + if (serio->drv != &psmouse_drv) + return -ENODEV; + + if (psmouse->type == proto->type) + return count; /* switched by other thread */ + } + + if (serio->parent && serio->id.type == SERIO_PS_PSTHRU) { + parent = serio_get_drvdata(serio->parent); + if (parent->pt_deactivate) + parent->pt_deactivate(parent); + } + + if (psmouse->disconnect) + psmouse->disconnect(psmouse); + + psmouse_set_state(psmouse, PSMOUSE_IGNORE); + input_unregister_device(&psmouse->dev); + + psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); + + if (psmouse_switch_protocol(psmouse, proto) < 0) { + psmouse_reset(psmouse); + /* default to PSMOUSE_PS2 */ + psmouse_switch_protocol(psmouse, &psmouse_protocols[0]); + } + + psmouse_initialize(psmouse); + psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); + + input_register_device(&psmouse->dev); + printk(KERN_INFO "input: %s on %s\n", psmouse->devname, serio->phys); + + if (parent && parent->pt_activate) + parent->pt_activate(parent); + + return count; +} + static ssize_t psmouse_attr_show_rate(struct psmouse *psmouse, char *buf) { return sprintf(buf, "%d\n", psmouse->rate); @@ -1017,34 +1220,26 @@ static ssize_t psmouse_attr_set_resetafter(struct psmouse *psmouse, const char * static int psmouse_set_maxproto(const char *val, struct kernel_param *kp) { - int i; + struct psmouse_protocol *proto; if (!val) return -EINVAL; - if (!strncmp(val, "any", 3)) { - *((unsigned int *)kp->arg) = -1U; - return 0; - } + proto = psmouse_protocol_by_name(val, strlen(val)); - for (i = 0; i < ARRAY_SIZE(psmouse_proto_abbrev); i++) { - if (!psmouse_proto_abbrev[i]) - continue; + if (!proto || !proto->maxproto) + return -EINVAL; - if (!strncmp(val, psmouse_proto_abbrev[i], strlen(psmouse_proto_abbrev[i]))) { - *((unsigned int *)kp->arg) = i; - return 0; - } - } + *((unsigned int *)kp->arg) = proto->type; - return -EINVAL; \ + return 0; \ } static int psmouse_get_maxproto(char *buffer, struct kernel_param *kp) { - return sprintf(buffer, "%s\n", - psmouse_max_proto < ARRAY_SIZE(psmouse_proto_abbrev) ? - psmouse_proto_abbrev[psmouse_max_proto] : "any"); + int type = *((unsigned int *)kp->arg); + + return sprintf(buffer, "%s\n", psmouse_protocol_by_type(type)->name); } static int __init psmouse_init(void) diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h index 4848be627a6..dc8e9ae07f3 100644 --- a/drivers/input/mouse/psmouse.h +++ b/drivers/input/mouse/psmouse.h @@ -78,6 +78,7 @@ enum psmouse_type { PSMOUSE_SYNAPTICS, PSMOUSE_ALPS, PSMOUSE_LIFEBOOK, + PSMOUSE_AUTO /* This one should always be last */ }; int psmouse_sliced_command(struct psmouse *psmouse, unsigned char command); diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index b82815a0b65..615bf62ad46 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -42,6 +42,7 @@ MODULE_LICENSE("GPL"); EXPORT_SYMBOL(serio_interrupt); EXPORT_SYMBOL(__serio_register_port); EXPORT_SYMBOL(serio_unregister_port); +EXPORT_SYMBOL(serio_unregister_child_port); EXPORT_SYMBOL(__serio_unregister_port_delayed); EXPORT_SYMBOL(__serio_register_driver); EXPORT_SYMBOL(serio_unregister_driver); @@ -179,12 +180,12 @@ static void serio_queue_event(void *object, struct module *owner, spin_lock_irqsave(&serio_event_lock, flags); /* - * Scan event list for the other events for the same serio port, + * Scan event list for the other events for the same serio port, * starting with the most recent one. If event is the same we * do not need add new one. If event is of different type we * need to add this event and should not look further because * we need to preseve sequence of distinct events. - */ + */ list_for_each_entry_reverse(event, &serio_event_list, node) { if (event->object == object) { if (event->type == event_type) @@ -653,6 +654,19 @@ void serio_unregister_port(struct serio *serio) up(&serio_sem); } +/* + * Safely unregisters child port if one is present. + */ +void serio_unregister_child_port(struct serio *serio) +{ + down(&serio_sem); + if (serio->child) { + serio_disconnect_port(serio->child); + serio_destroy_port(serio->child); + } + up(&serio_sem); +} + /* * Submits register request to kseriod for subsequent execution. * Can be used when it is not obvious whether the serio_sem is diff --git a/include/linux/serio.h b/include/linux/serio.h index a2d3b9ae06f..aa4d6493a03 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -83,6 +83,7 @@ static inline void serio_register_port(struct serio *serio) } void serio_unregister_port(struct serio *serio); +void serio_unregister_child_port(struct serio *serio); void __serio_unregister_port_delayed(struct serio *serio, struct module *owner); static inline void serio_unregister_port_delayed(struct serio *serio) { @@ -153,6 +154,11 @@ static inline int serio_pin_driver(struct serio *serio) return down_interruptible(&serio->drv_sem); } +static inline void serio_pin_driver_uninterruptible(struct serio *serio) +{ + down(&serio->drv_sem); +} + static inline void serio_unpin_driver(struct serio *serio) { up(&serio->drv_sem); -- cgit v1.2.3-70-g09d2 From f49d16ef2d6f008119d4ee2c895781fb229bad68 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 26 Jun 2005 11:36:52 +0200 Subject: [PATCH] forcedeth: Add support for new device id This is a multi-part message in MIME format. --- drivers/net/forcedeth.c | 17 ++++++++++++++++- include/linux/pci_ids.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index b471d1a8ffd..64f0f697c95 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -84,6 +84,7 @@ * 0.32: 16 Apr 2005: RX_ERROR4 handling added. * 0.33: 16 May 2005: Support for MCP51 added. * 0.34: 18 Jun 2005: Add DEV_NEED_LINKTIMER to all nForce nics. + * 0.35: 26 Jun 2005: Support for MCP55 added. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -95,7 +96,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.34" +#define FORCEDETH_VERSION "0.35" #define DRV_NAME "forcedeth" #include @@ -2284,6 +2285,20 @@ static struct pci_device_id pci_tbl[] = { .subdevice = PCI_ANY_ID, .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, }, + { /* MCP55 Ethernet Controller */ + .vendor = PCI_VENDOR_ID_NVIDIA, + .device = PCI_DEVICE_ID_NVIDIA_NVENET_14, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + }, + { /* MCP55 Ethernet Controller */ + .vendor = PCI_VENDOR_ID_NVIDIA, + .device = PCI_DEVICE_ID_NVIDIA_NVENET_15, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = DEV_NEED_LASTPACKET1|DEV_IRQMASK_2|DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER, + }, {0,}, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bf608808a60..3af7450278b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1284,6 +1284,8 @@ #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2 0x0348 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000 0x034C #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100 0x034E +#define PCI_DEVICE_ID_NVIDIA_NVENET_14 0x0372 +#define PCI_DEVICE_ID_NVIDIA_NVENET_15 0x0373 #define PCI_VENDOR_ID_IMS 0x10e0 #define PCI_DEVICE_ID_IMS_8849 0x8849 -- cgit v1.2.3-70-g09d2 From ec9f47cd6a14ca069bb7552a984c0a338fc7262b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 11:12:54 +0100 Subject: [PATCH] Serial: Split 8250 port table Add separate files for the different 8250 ISA-based serial boards. Looking across all the various architectures, it seems reasonable that we can key the availability of the configuration options for these beasts to the bus-related symbols (iow, CONFIG_ISA). We also standardise the base baud/uart clock rate for these boards - I'm sure that isn't architecture specific, but is solely dependent on the crystal fitted on the board (which should be the same no matter what type of machine its fitted into.) Signed-off-by: Russell King --- drivers/serial/8250.c | 25 +++++--------- drivers/serial/8250_accent.c | 47 ++++++++++++++++++++++++++ drivers/serial/8250_boca.c | 61 ++++++++++++++++++++++++++++++++++ drivers/serial/8250_fourport.c | 53 +++++++++++++++++++++++++++++ drivers/serial/8250_hub6.c | 58 ++++++++++++++++++++++++++++++++ drivers/serial/8250_mca.c | 64 +++++++++++++++++++++++++++++++++++ drivers/serial/Kconfig | 75 +++++++++++++++++++++++++++++++++--------- drivers/serial/Makefile | 5 +++ include/linux/serial_8250.h | 1 + 9 files changed, 357 insertions(+), 32 deletions(-) create mode 100644 drivers/serial/8250_accent.c create mode 100644 drivers/serial/8250_boca.c create mode 100644 drivers/serial/8250_fourport.c create mode 100644 drivers/serial/8250_hub6.c create mode 100644 drivers/serial/8250_mca.c (limited to 'include/linux') diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index d8b9d2b8c20..34e75bc8f4c 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -77,23 +77,9 @@ static unsigned int share_irqs = SERIAL8250_SHARE_IRQS; */ #define is_real_interrupt(irq) ((irq) != 0) -/* - * This converts from our new CONFIG_ symbols to the symbols - * that asm/serial.h expects. You _NEED_ to comment out the - * linux/config.h include contained inside asm/serial.h for - * this to work. - */ -#undef CONFIG_SERIAL_MANY_PORTS -#undef CONFIG_SERIAL_DETECT_IRQ -#undef CONFIG_SERIAL_MULTIPORT -#undef CONFIG_HUB6 - #ifdef CONFIG_SERIAL_8250_DETECT_IRQ #define CONFIG_SERIAL_DETECT_IRQ 1 #endif -#ifdef CONFIG_SERIAL_8250_MULTIPORT -#define CONFIG_SERIAL_MULTIPORT 1 -#endif #ifdef CONFIG_SERIAL_8250_MANY_PORTS #define CONFIG_SERIAL_MANY_PORTS 1 #endif @@ -2323,10 +2309,11 @@ static int __devinit serial8250_probe(struct device *dev) { struct plat_serial8250_port *p = dev->platform_data; struct uart_port port; + int ret, i; memset(&port, 0, sizeof(struct uart_port)); - for (; p && p->flags != 0; p++) { + for (i = 0; p && p->flags != 0; p++, i++) { port.iobase = p->iobase; port.membase = p->membase; port.irq = p->irq; @@ -2335,10 +2322,16 @@ static int __devinit serial8250_probe(struct device *dev) port.iotype = p->iotype; port.flags = p->flags; port.mapbase = p->mapbase; + port.hub6 = p->hub6; port.dev = dev; if (share_irqs) port.flags |= UPF_SHARE_IRQ; - serial8250_register_port(&port); + ret = serial8250_register_port(&port); + if (ret < 0) { + dev_err(dev, "unable to register port at index %d " + "(IO%lx MEM%lx IRQ%d): %d\n", i, + p->iobase, p->mapbase, p->irq, ret); + } } return 0; } diff --git a/drivers/serial/8250_accent.c b/drivers/serial/8250_accent.c new file mode 100644 index 00000000000..1f2c276063e --- /dev/null +++ b/drivers/serial/8250_accent.c @@ -0,0 +1,47 @@ +/* + * linux/drivers/serial/8250_accent.c + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF, \ + } + +static struct plat_serial8250_port accent_data[] = { + PORT(0x330, 4), + PORT(0x338, 4), + { }, +}; + +static struct platform_device accent_device = { + .name = "serial8250", + .id = 2, + .dev = { + .platform_data = accent_data, + }, +}; + +static int __init accent_init(void) +{ + return platform_device_register(&accent_device); +} + +module_init(accent_init); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("8250 serial probe module for Accent Async cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250_boca.c b/drivers/serial/8250_boca.c new file mode 100644 index 00000000000..465c9ea1e7a --- /dev/null +++ b/drivers/serial/8250_boca.c @@ -0,0 +1,61 @@ +/* + * linux/drivers/serial/8250_boca.c + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF, \ + } + +static struct plat_serial8250_port boca_data[] = { + PORT(0x100, 12), + PORT(0x108, 12), + PORT(0x110, 12), + PORT(0x118, 12), + PORT(0x120, 12), + PORT(0x128, 12), + PORT(0x130, 12), + PORT(0x138, 12), + PORT(0x140, 12), + PORT(0x148, 12), + PORT(0x150, 12), + PORT(0x158, 12), + PORT(0x160, 12), + PORT(0x168, 12), + PORT(0x170, 12), + PORT(0x178, 12), + { }, +}; + +static struct platform_device boca_device = { + .name = "serial8250", + .id = 3, + .dev = { + .platform_data = boca_data, + }, +}; + +static int __init boca_init(void) +{ + return platform_device_register(&boca_device); +} + +module_init(boca_init); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("8250 serial probe module for Boca cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250_fourport.c b/drivers/serial/8250_fourport.c new file mode 100644 index 00000000000..e9b4d908ef4 --- /dev/null +++ b/drivers/serial/8250_fourport.c @@ -0,0 +1,53 @@ +/* + * linux/drivers/serial/8250_fourport.c + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF | UPF_FOURPORT, \ + } + +static struct plat_serial8250_port fourport_data[] = { + PORT(0x1a0, 9), + PORT(0x1a8, 9), + PORT(0x1b0, 9), + PORT(0x1b8, 9), + PORT(0x2a0, 5), + PORT(0x2a8, 5), + PORT(0x2b0, 5), + PORT(0x2b8, 5), + { }, +}; + +static struct platform_device fourport_device = { + .name = "serial8250", + .id = 1, + .dev = { + .platform_data = fourport_data, + }, +}; + +static int __init fourport_init(void) +{ + return platform_device_register(&fourport_device); +} + +module_init(fourport_init); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("8250 serial probe module for AST Fourport cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250_hub6.c b/drivers/serial/8250_hub6.c new file mode 100644 index 00000000000..77f396f84b4 --- /dev/null +++ b/drivers/serial/8250_hub6.c @@ -0,0 +1,58 @@ +/* + * linux/drivers/serial/8250_hub6.c + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define HUB6(card,port) \ + { \ + .iobase = 0x302, \ + .irq = 3, \ + .uartclk = 1843200, \ + .iotype = UPIO_HUB6, \ + .flags = UPF_BOOT_AUTOCONF, \ + .hub6 = (card) << 6 | (port) << 3 | 1, \ + } + +static struct plat_serial8250_port hub6_data[] = { + HUB6(0,0), + HUB6(0,1), + HUB6(0,2), + HUB6(0,3), + HUB6(0,4), + HUB6(0,5), + HUB6(1,0), + HUB6(1,1), + HUB6(1,2), + HUB6(1,3), + HUB6(1,4), + HUB6(1,5), + { }, +}; + +static struct platform_device hub6_device = { + .name = "serial8250", + .id = 4, + .dev = { + .platform_data = hub6_data, + }, +}; + +static int __init hub6_init(void) +{ + return platform_device_register(&hub6_device); +} + +module_init(hub6_init); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("8250 serial probe module for Hub6 cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250_mca.c b/drivers/serial/8250_mca.c new file mode 100644 index 00000000000..f0c40d68b8c --- /dev/null +++ b/drivers/serial/8250_mca.c @@ -0,0 +1,64 @@ +/* + * linux/drivers/serial/8250_mca.c + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +/* + * FIXME: Should we be doing AUTO_IRQ here? + */ +#ifdef CONFIG_SERIAL_8250_DETECT_IRQ +#define MCA_FLAGS UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ +#else +#define MCA_FLAGS UPF_BOOT_AUTOCONF | UPF_SKIP_TEST +#endif + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = MCA_FLAGS, \ + } + +static struct plat_serial8250_port mca_data[] = { + PORT(0x3220, 3), + PORT(0x3228, 3), + PORT(0x4220, 3), + PORT(0x4228, 3), + PORT(0x5220, 3), + PORT(0x5228, 3), + { }, +}; + +static struct platform_device mca_device = { + .name = "serial8250", + .id = 5, + .dev = { + .platform_data = mca_data, + }, +}; + +static int __init mca_init(void) +{ + if (!MCA_bus) + return -ENODEV; + return platform_device_register(&mca_device); +} + +module_init(mca_init); + +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("8250 serial probe module for MCA ports"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 25fcef2c42d..e879bce160d 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -89,11 +89,11 @@ config SERIAL_8250_NR_UARTS int "Maximum number of non-legacy 8250/16550 serial ports" depends on SERIAL_8250 default "4" - ---help--- - Set this to the number of non-legacy serial ports you want - the driver to support. This includes any ports discovered - via ACPI or PCI enumeration and any ports that may be added - at run-time via hot-plug. + help + Set this to the number of serial ports you want the driver + to support. This includes any ports discovered via ACPI or + PCI enumeration and any ports that may be added at run-time + via hot-plug, or any ISA multi-port serial cards. config SERIAL_8250_EXTENDED bool "Extended 8250/16550 serial driver options" @@ -141,31 +141,74 @@ config SERIAL_8250_DETECT_IRQ If unsure, say N. -config SERIAL_8250_MULTIPORT - bool "Support special multiport boards" - depends on SERIAL_8250_EXTENDED - help - Some multiport serial ports have special ports which are used to - signal when there are any serial ports on the board which need - servicing. Say Y here to enable the serial driver to take advantage - of those special I/O ports. - config SERIAL_8250_RSA bool "Support RSA serial ports" depends on SERIAL_8250_EXTENDED help ::: To be written ::: -comment "Non-8250 serial port support" +# +# Multi-port serial cards +# + +config SERIAL_8250_FOURPORT + tristate "Support Fourport cards" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + Say Y here if you have an AST FourPort serial board. + + To compile this driver as a module, choose M here: the module + will be called 8250_fourport. + +config SERIAL_8250_ACCENT + tristate "Support Accent cards" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + Say Y here if you have an Accent Async serial board. + + To compile this driver as a module, choose M here: the module + will be called 8250_accent. + + +config SERIAL_8250_BOCA + tristate "Support Boca cards" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + Say Y here if you have a Boca serial board. Please read the Boca + mini-HOWTO, avaialble from + + To compile this driver as a module, choose M here: the module + will be called 8250_boca. + + +config SERIAL_8250_HUB6 + tristate "Support Hub6 cards" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + Say Y here if you have a HUB6 serial board. + + To compile this driver as a module, choose M here: the module + will be called 8250_hub6. + +config SERIAL_8250_MCA + tristate "Support 8250-type ports on MCA buses" + depends on SERIAL_8250 != n && MCA + help + Say Y here if you have a MCA serial ports. + + To compile this driver as a module, choose M here: the module + will be called 8250_mca. config SERIAL_8250_ACORN tristate "Acorn expansion card serial port support" - depends on ARM && ARCH_ACORN && SERIAL_8250 + depends on ARCH_ACORN && SERIAL_8250 help If you have an Atomwide Serial card or Serial Port card for an Acorn system, say Y to this option. The driver can handle 1, 2, or 3 port cards. If unsure, say N. +comment "Non-8250 serial port support" + config SERIAL_AMBA_PL010 tristate "ARM AMBA PL010 serial port support" depends on ARM_AMBA diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 8f1cdde7dbe..65bd4381685 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -17,6 +17,11 @@ obj-$(CONFIG_SERIAL_8250) += 8250.o $(serial-8250-y) obj-$(CONFIG_SERIAL_8250_CS) += serial_cs.o obj-$(CONFIG_SERIAL_8250_ACORN) += 8250_acorn.o obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o +obj-$(CONFIG_SERIAL_8250_FOURPORT) += 8250_fourport.o +obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o +obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o +obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o +obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 823181af6dd..3e3c1fa35b0 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -22,6 +22,7 @@ struct plat_serial8250_port { unsigned int uartclk; /* UART clock rate */ unsigned char regshift; /* register shift */ unsigned char iotype; /* UPIO_* */ + unsigned char hub6; unsigned int flags; /* UPF_* flags */ }; -- cgit v1.2.3-70-g09d2 From 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Jun 2005 10:55:12 +0200 Subject: [PATCH] Update cfq io scheduler to time sliced design This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 2 + arch/ia64/kernel/entry.S | 4 +- arch/ppc/kernel/misc.S | 2 + drivers/block/as-iosched.c | 5 +- drivers/block/cfq-iosched.c | 1910 +++++++++++++++++++++++++------------- drivers/block/deadline-iosched.c | 3 +- drivers/block/elevator.c | 9 +- drivers/block/ll_rw_blk.c | 59 +- fs/Makefile | 1 + fs/ioprio.c | 172 ++++ fs/reiserfs/journal.c | 12 + include/asm-i386/unistd.h | 4 +- include/asm-ia64/unistd.h | 2 + include/asm-ppc/unistd.h | 4 +- include/asm-x86_64/unistd.h | 6 +- include/linux/bio.h | 14 + include/linux/blkdev.h | 25 +- include/linux/elevator.h | 8 +- include/linux/fs.h | 19 + include/linux/init_task.h | 2 + include/linux/ioprio.h | 87 ++ include/linux/sched.h | 6 +- include/linux/writeback.h | 6 +- kernel/exit.c | 2 + kernel/fork.c | 5 + kernel/sched.c | 8 - 26 files changed, 1685 insertions(+), 692 deletions(-) create mode 100644 fs/ioprio.c create mode 100644 include/linux/ioprio.h (limited to 'include/linux') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 442a6e937b1..3db9a04aec6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -289,3 +289,5 @@ ENTRY(sys_call_table) .long sys_add_key .long sys_request_key .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index b1d5d3d5276..785a51b0ad8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1577,8 +1577,8 @@ sys_call_table: data8 sys_add_key data8 sys_request_key data8 sys_keyctl - data8 sys_ni_syscall - data8 sys_ni_syscall // 1275 + data8 sys_ioprio_set + data8 sys_ioprio_get // 1275 data8 sys_set_zone_reclaim data8 sys_ni_syscall data8 sys_ni_syscall diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index b6a63a49a23..191a8def3bd 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table) .long sys_request_key /* 270 */ .long sys_keyctl .long sys_waitid + .long sys_ioprio_set + .long sys_ioprio_get diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 3410b4d294b..91aeb678135 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq) rq->elevator_private = NULL; } -static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +static int as_set_request(request_queue_t *q, struct request *rq, + struct bio *bio, int gfp_mask) { struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); @@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) return 1; } -static int as_may_queue(request_queue_t *q, int rw) +static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index 3ac47dde64d..35f6e569d5e 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -21,22 +21,33 @@ #include #include #include - -static unsigned long max_elapsed_crq; -static unsigned long max_elapsed_dispatch; +#include +#include /* * tunables */ static int cfq_quantum = 4; /* max queue in one round of service */ static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ -static int cfq_service = HZ; /* period over which service is avg */ -static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */ -static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */ -static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */ +static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ +static int cfq_slice_sync = HZ / 10; +static int cfq_slice_async = HZ / 50; +static int cfq_slice_async_rq = 2; +static int cfq_slice_idle = HZ / 50; + +#define CFQ_IDLE_GRACE (HZ / 10) +#define CFQ_SLICE_SCALE (5) + +#define CFQ_KEY_ASYNC (0) + +/* + * disable queueing at the driver/hardware level + */ +static int cfq_max_depth = 1; + /* * for the hash of cfqq inside the cfqd */ @@ -55,6 +66,7 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) +#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define RQ_DATA(rq) (rq)->elevator_private @@ -75,78 +87,101 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) #define rq_rb_key(rq) (rq)->sector -/* - * threshold for switching off non-tag accounting - */ -#define CFQ_MAX_TAG (4) - -/* - * sort key types and names - */ -enum { - CFQ_KEY_PGID, - CFQ_KEY_TGID, - CFQ_KEY_UID, - CFQ_KEY_GID, - CFQ_KEY_LAST, -}; - -static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL }; - static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; static kmem_cache_t *cfq_ioc_pool; +#define CFQ_PRIO_LISTS IOPRIO_BE_NR +#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) +#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) +#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) + +#define cfq_cfqq_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) + +/* + * Per block device queue structure + */ struct cfq_data { - struct list_head rr_list; + atomic_t ref; + request_queue_t *queue; + + /* + * rr list of queues with requests and the count of them + */ + struct list_head rr_list[CFQ_PRIO_LISTS]; + struct list_head busy_rr; + struct list_head cur_rr; + struct list_head idle_rr; + unsigned int busy_queues; + + /* + * non-ordered list of empty cfqq's + */ struct list_head empty_list; + /* + * cfqq lookup hash + */ struct hlist_head *cfq_hash; - struct hlist_head *crq_hash; - /* queues on rr_list (ie they have pending requests */ - unsigned int busy_queues; + /* + * global crq hash for all queues + */ + struct hlist_head *crq_hash; unsigned int max_queued; - atomic_t ref; + mempool_t *crq_pool; - int key_type; + int rq_in_driver; - mempool_t *crq_pool; + /* + * schedule slice state info + */ + /* + * idle window management + */ + struct timer_list idle_slice_timer; + struct work_struct unplug_work; - request_queue_t *queue; + struct cfq_queue *active_queue; + struct cfq_io_context *active_cic; + int cur_prio, cur_end_prio; + unsigned int dispatch_slice; + + struct timer_list idle_class_timer; sector_t last_sector; + unsigned long last_end_request; - int rq_in_driver; + unsigned int rq_starved; /* * tunables, see top of file */ unsigned int cfq_quantum; unsigned int cfq_queued; - unsigned int cfq_fifo_expire_r; - unsigned int cfq_fifo_expire_w; - unsigned int cfq_fifo_batch_expire; + unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; - unsigned int find_best_crq; - - unsigned int cfq_tagged; + unsigned int cfq_slice[2]; + unsigned int cfq_slice_async_rq; + unsigned int cfq_slice_idle; + unsigned int cfq_max_depth; }; +/* + * Per process-grouping structure + */ struct cfq_queue { /* reference count */ atomic_t ref; /* parent cfq_data */ struct cfq_data *cfqd; - /* hash of mergeable requests */ + /* cfqq lookup hash */ struct hlist_node cfq_hash; /* hash key */ - unsigned long key; - /* whether queue is on rr (or empty) list */ - int on_rr; + unsigned int key; /* on either rr or empty list of cfqd */ struct list_head cfq_list; /* sorted list of pending requests */ @@ -158,21 +193,35 @@ struct cfq_queue { /* currently allocated requests */ int allocated[2]; /* fifo list of requests in sort_list */ - struct list_head fifo[2]; - /* last time fifo expired */ - unsigned long last_fifo_expire; - - int key_type; - - unsigned long service_start; - unsigned long service_used; + struct list_head fifo; - unsigned int max_rate; + unsigned long slice_start; + unsigned long slice_end; + unsigned long slice_left; + unsigned long service_last; /* number of requests that have been handed to the driver */ int in_flight; - /* number of currently allocated requests */ - int alloc_limit[2]; + + /* io prio of this group */ + unsigned short ioprio, org_ioprio; + unsigned short ioprio_class, org_ioprio_class; + + /* whether queue is on rr (or empty) list */ + unsigned on_rr : 1; + /* idle slice, waiting for new request submission */ + unsigned wait_request : 1; + /* set when wait_request gets set, reset on first rq alloc */ + unsigned must_alloc : 1; + /* only gets one must_alloc per slice */ + unsigned must_alloc_slice : 1; + /* idle slice, request added, now waiting to dispatch it */ + unsigned must_dispatch : 1; + /* fifo expire per-slice */ + unsigned fifo_expire : 1; + + unsigned idle_window : 1; + unsigned prio_changed : 1; }; struct cfq_rq { @@ -184,42 +233,17 @@ struct cfq_rq { struct cfq_queue *cfq_queue; struct cfq_io_context *io_context; - unsigned long service_start; - unsigned long queue_start; - - unsigned int in_flight : 1; - unsigned int accounted : 1; - unsigned int is_sync : 1; - unsigned int is_write : 1; + unsigned in_flight : 1; + unsigned accounted : 1; + unsigned is_sync : 1; + unsigned requeued : 1; }; -static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long); +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int); static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); -static void cfq_update_next_crq(struct cfq_rq *); static void cfq_put_cfqd(struct cfq_data *cfqd); -/* - * what the fairness is based on (ie how processes are grouped and - * differentiated) - */ -static inline unsigned long -cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk) -{ - /* - * optimize this so that ->key_type is the offset into the struct - */ - switch (cfqd->key_type) { - case CFQ_KEY_PGID: - return process_group(tsk); - default: - case CFQ_KEY_TGID: - return tsk->tgid; - case CFQ_KEY_UID: - return tsk->uid; - case CFQ_KEY_GID: - return tsk->gid; - } -} +#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) /* * lots of deadline iosched dupes, can be abstracted later... @@ -235,16 +259,12 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) if (q->last_merge == crq->request) q->last_merge = NULL; - - cfq_update_next_crq(crq); } static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) { const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - BUG_ON(!hlist_unhashed(&crq->hash)); - hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); } @@ -257,8 +277,6 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) struct cfq_rq *crq = list_entry_hash(entry); struct request *__rq = crq->request; - BUG_ON(hlist_unhashed(&crq->hash)); - if (!rq_mergeable(__rq)) { cfq_del_crq_hash(crq); continue; @@ -287,36 +305,16 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) return crq2; if (crq2 == NULL) return crq1; + if (crq1->requeued) + return crq1; + if (crq2->requeued) + return crq2; s1 = crq1->request->sector; s2 = crq2->request->sector; last = cfqd->last_sector; -#if 0 - if (!list_empty(&cfqd->queue->queue_head)) { - struct list_head *entry = &cfqd->queue->queue_head; - unsigned long distance = ~0UL; - struct request *rq; - - while ((entry = entry->prev) != &cfqd->queue->queue_head) { - rq = list_entry_rq(entry); - - if (blk_barrier_rq(rq)) - break; - - if (distance < abs(s1 - rq->sector + rq->nr_sectors)) { - distance = abs(s1 - rq->sector +rq->nr_sectors); - last = rq->sector + rq->nr_sectors; - } - if (distance < abs(s2 - rq->sector + rq->nr_sectors)) { - distance = abs(s2 - rq->sector +rq->nr_sectors); - last = rq->sector + rq->nr_sectors; - } - } - } -#endif - /* * by definition, 1KiB is 2 sectors */ @@ -377,11 +375,13 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *crq_next = NULL, *crq_prev = NULL; struct rb_node *rbnext, *rbprev; - if (!ON_RB(&last->rb_node)) - return NULL; - - if ((rbnext = rb_next(&last->rb_node)) == NULL) + if (ON_RB(&last->rb_node)) + rbnext = rb_next(&last->rb_node); + else { rbnext = rb_first(&cfqq->sort_list); + if (rbnext == &last->rb_node) + rbnext = NULL; + } rbprev = rb_prev(&last->rb_node); @@ -401,67 +401,53 @@ static void cfq_update_next_crq(struct cfq_rq *crq) cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); } -static int cfq_check_sort_rr_list(struct cfq_queue *cfqq) +static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) { - struct list_head *head = &cfqq->cfqd->rr_list; - struct list_head *next, *prev; - - /* - * list might still be ordered - */ - next = cfqq->cfq_list.next; - if (next != head) { - struct cfq_queue *cnext = list_entry_cfqq(next); + struct cfq_data *cfqd = cfqq->cfqd; + struct list_head *list, *entry; - if (cfqq->service_used > cnext->service_used) - return 1; - } + BUG_ON(!cfqq->on_rr); - prev = cfqq->cfq_list.prev; - if (prev != head) { - struct cfq_queue *cprev = list_entry_cfqq(prev); + list_del(&cfqq->cfq_list); - if (cfqq->service_used < cprev->service_used) - return 1; + if (cfq_class_rt(cfqq)) + list = &cfqd->cur_rr; + else if (cfq_class_idle(cfqq)) + list = &cfqd->idle_rr; + else { + /* + * if cfqq has requests in flight, don't allow it to be + * found in cfq_set_active_queue before it has finished them. + * this is done to increase fairness between a process that + * has lots of io pending vs one that only generates one + * sporadically or synchronously + */ + if (cfqq->in_flight) + list = &cfqd->busy_rr; + else + list = &cfqd->rr_list[cfqq->ioprio]; } - return 0; -} - -static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) -{ - struct list_head *entry = &cfqq->cfqd->rr_list; - - if (!cfqq->on_rr) - return; - if (!new_queue && !cfq_check_sort_rr_list(cfqq)) + /* + * if queue was preempted, just add to front to be fair. busy_rr + * isn't sorted. + */ + if (preempted || list == &cfqd->busy_rr) { + list_add(&cfqq->cfq_list, list); return; - - list_del(&cfqq->cfq_list); + } /* - * sort by our mean service_used, sub-sort by in-flight requests + * sort by when queue was last serviced */ - while ((entry = entry->prev) != &cfqq->cfqd->rr_list) { + entry = list; + while ((entry = entry->prev) != list) { struct cfq_queue *__cfqq = list_entry_cfqq(entry); - if (cfqq->service_used > __cfqq->service_used) + if (!__cfqq->service_last) + break; + if (time_before(__cfqq->service_last, cfqq->service_last)) break; - else if (cfqq->service_used == __cfqq->service_used) { - struct list_head *prv; - - while ((prv = entry->prev) != &cfqq->cfqd->rr_list) { - __cfqq = list_entry_cfqq(prv); - - WARN_ON(__cfqq->service_used > cfqq->service_used); - if (cfqq->service_used != __cfqq->service_used) - break; - if (cfqq->in_flight > __cfqq->in_flight) - break; - - entry = prv; - } - } } list_add(&cfqq->cfq_list, entry); @@ -469,28 +455,24 @@ static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) /* * add to busy list of queues for service, trying to be fair in ordering - * the pending list according to requests serviced + * the pending list according to last request service */ static inline void -cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) +cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) { - /* - * it's currently on the empty list - */ + BUG_ON(cfqq->on_rr); cfqq->on_rr = 1; cfqd->busy_queues++; - if (time_after(jiffies, cfqq->service_start + cfq_service)) - cfqq->service_used >>= 3; - - cfq_sort_rr_list(cfqq, 1); + cfq_resort_rr_list(cfqq, requeue); } static inline void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - list_move(&cfqq->cfq_list, &cfqd->empty_list); + BUG_ON(!cfqq->on_rr); cfqq->on_rr = 0; + list_move(&cfqq->cfq_list, &cfqd->empty_list); BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -505,16 +487,17 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq) if (ON_RB(&crq->rb_node)) { struct cfq_data *cfqd = cfqq->cfqd; + const int sync = crq->is_sync; - BUG_ON(!cfqq->queued[crq->is_sync]); + BUG_ON(!cfqq->queued[sync]); + cfqq->queued[sync]--; cfq_update_next_crq(crq); - cfqq->queued[crq->is_sync]--; rb_erase(&crq->rb_node, &cfqq->sort_list); RB_CLEAR_COLOR(&crq->rb_node); - if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) + if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); } } @@ -562,7 +545,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) rb_insert_color(&crq->rb_node, &cfqq->sort_list); if (!cfqq->on_rr) - cfq_add_cfqq_rr(cfqd, cfqq); + cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued); /* * check if this request is a better next-serve candidate @@ -581,11 +564,10 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) cfq_add_crq_rb(crq); } -static struct request * -cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) +static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) + { - const unsigned long key = cfq_hash_key(cfqd, current); - struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key); + struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid); struct rb_node *n; if (!cfqq) @@ -609,20 +591,23 @@ out: static void cfq_deactivate_request(request_queue_t *q, struct request *rq) { + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { struct cfq_queue *cfqq = crq->cfq_queue; - if (cfqq->cfqd->cfq_tagged) { - cfqq->service_used--; - cfq_sort_rr_list(cfqq, 0); - } - if (crq->accounted) { crq->accounted = 0; - cfqq->cfqd->rq_in_driver--; + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; } + if (crq->in_flight) { + crq->in_flight = 0; + WARN_ON(!cfqq->in_flight); + cfqq->in_flight--; + } + crq->requeued = 1; } } @@ -640,11 +625,10 @@ static void cfq_remove_request(request_queue_t *q, struct request *rq) struct cfq_rq *crq = RQ_DATA(rq); if (crq) { - cfq_remove_merge_hints(q, crq); list_del_init(&rq->queuelist); + cfq_del_crq_rb(crq); + cfq_remove_merge_hints(q, crq); - if (crq->cfq_queue) - cfq_del_crq_rb(crq); } } @@ -662,21 +646,15 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) } __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } + if (__rq && elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_BACK_MERGE; + goto out; } __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); - if (__rq) { - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + if (__rq && elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_FRONT_MERGE; + goto out; } return ELEVATOR_NO_MERGE; @@ -709,20 +687,194 @@ static void cfq_merged_requests(request_queue_t *q, struct request *rq, struct request *next) { - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_rq *cnext = RQ_DATA(next); - cfq_merged_request(q, rq); - if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { - if (time_before(cnext->queue_start, crq->queue_start)) { - list_move(&rq->queuelist, &next->queuelist); - crq->queue_start = cnext->queue_start; + /* + * reposition in fifo if next is older than rq + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(next->start_time, rq->start_time)) + list_move(&rq->queuelist, &next->queuelist); + + cfq_remove_request(q, next); +} + +static inline void +__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + if (cfqq) { + /* + * stop potential idle class queues waiting service + */ + del_timer(&cfqd->idle_class_timer); + + cfqq->slice_start = jiffies; + cfqq->slice_end = 0; + cfqq->slice_left = 0; + cfqq->must_alloc_slice = 0; + cfqq->fifo_expire = 0; + } + + cfqd->active_queue = cfqq; +} + +/* + * 0 + * 0,1 + * 0,1,2 + * 0,1,2,3 + * 0,1,2,3,4 + * 0,1,2,3,4,5 + * 0,1,2,3,4,5,6 + * 0,1,2,3,4,5,6,7 + */ +static int cfq_get_next_prio_level(struct cfq_data *cfqd) +{ + int prio, wrap; + + prio = -1; + wrap = 0; + do { + int p; + + for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { + if (!list_empty(&cfqd->rr_list[p])) { + prio = p; + break; + } + } + + if (prio != -1) + break; + cfqd->cur_prio = 0; + if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { + cfqd->cur_end_prio = 0; + if (wrap) + break; + wrap = 1; } + } while (1); + + if (unlikely(prio == -1)) + return -1; + + BUG_ON(prio >= CFQ_PRIO_LISTS); + + list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); + + cfqd->cur_prio = prio + 1; + if (cfqd->cur_prio > cfqd->cur_end_prio) { + cfqd->cur_end_prio = cfqd->cur_prio; + cfqd->cur_prio = 0; + } + if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { + cfqd->cur_prio = 0; + cfqd->cur_end_prio = 0; } - cfq_update_next_crq(cnext); - cfq_remove_request(q, next); + return prio; +} + +static void cfq_set_active_queue(struct cfq_data *cfqd) +{ + struct cfq_queue *cfqq = NULL; + + /* + * if current list is non-empty, grab first entry. if it is empty, + * get next prio level and grab first entry then if any are spliced + */ + if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) + cfqq = list_entry_cfqq(cfqd->cur_rr.next); + + /* + * if we have idle queues and no rt or be queues had pending + * requests, either allow immediate service if the grace period + * has passed or arm the idle grace timer + */ + if (!cfqq && !list_empty(&cfqd->idle_rr)) { + unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; + + if (time_after_eq(jiffies, end)) + cfqq = list_entry_cfqq(cfqd->idle_rr.next); + else + mod_timer(&cfqd->idle_class_timer, end); + } + + __cfq_set_active_queue(cfqd, cfqq); +} + +/* + * current cfqq expired its slice (or was too idle), select new one + */ +static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) +{ + struct cfq_queue *cfqq = cfqd->active_queue; + + if (cfqq) { + unsigned long now = jiffies; + + if (cfqq->wait_request) + del_timer(&cfqd->idle_slice_timer); + + if (!preempted && !cfqq->in_flight) + cfqq->service_last = now; + + cfqq->must_dispatch = 0; + cfqq->wait_request = 0; + + /* + * store what was left of this slice, if the queue idled out + * or was preempted + */ + if (time_after(now, cfqq->slice_end)) + cfqq->slice_left = now - cfqq->slice_end; + else + cfqq->slice_left = 0; + + if (cfqq->on_rr) + cfq_resort_rr_list(cfqq, preempted); + + cfqd->active_queue = NULL; + + if (cfqd->active_cic) { + put_io_context(cfqd->active_cic->ioc); + cfqd->active_cic = NULL; + } + } + + cfqd->dispatch_slice = 0; +} + +static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) + +{ + WARN_ON(!RB_EMPTY(&cfqq->sort_list)); + WARN_ON(cfqq != cfqd->active_queue); + + /* + * idle is disabled, either manually or by past process history + */ + if (!cfqd->cfq_slice_idle) + return 0; + if (!cfqq->idle_window) + return 0; + /* + * task has exited, don't wait + */ + if (cfqd->active_cic && !cfqd->active_cic->ioc->task) + return 0; + + cfqq->wait_request = 1; + cfqq->must_alloc = 1; + + if (!timer_pending(&cfqd->idle_slice_timer)) { + unsigned long slice_left = cfqq->slice_end - 1; + + cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left); + add_timer(&cfqd->idle_slice_timer); + } + + return 1; } /* @@ -738,31 +890,39 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) struct request *__rq; sector_t last; - cfq_del_crq_rb(crq); - cfq_remove_merge_hints(q, crq); list_del(&crq->request->queuelist); last = cfqd->last_sector; - while ((entry = entry->prev) != head) { - __rq = list_entry_rq(entry); + list_for_each_entry_reverse(__rq, head, queuelist) { + struct cfq_rq *__crq = RQ_DATA(__rq); - if (blk_barrier_rq(crq->request)) + if (blk_barrier_rq(__rq)) + break; + if (!blk_fs_request(__rq)) break; - if (!blk_fs_request(crq->request)) + if (__crq->requeued) break; - if (crq->request->sector > __rq->sector) + if (__rq->sector <= crq->request->sector) break; if (__rq->sector > last && crq->request->sector < last) { - last = crq->request->sector; + last = crq->request->sector + crq->request->nr_sectors; break; } + entry = &__rq->queuelist; } cfqd->last_sector = last; + + cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); + + cfq_del_crq_rb(crq); + cfq_remove_merge_hints(q, crq); + crq->in_flight = 1; + crq->requeued = 0; cfqq->in_flight++; - list_add(&crq->request->queuelist, entry); + list_add_tail(&crq->request->queuelist, entry); } /* @@ -771,105 +931,176 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) { struct cfq_data *cfqd = cfqq->cfqd; - const int reads = !list_empty(&cfqq->fifo[0]); - const int writes = !list_empty(&cfqq->fifo[1]); - unsigned long now = jiffies; + struct request *rq; struct cfq_rq *crq; - if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire)) + if (cfqq->fifo_expire) return NULL; - crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist)); - if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) { - cfqq->last_fifo_expire = now; - return crq; - } + if (!list_empty(&cfqq->fifo)) { + int fifo = cfq_cfqq_sync(cfqq); - crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist)); - if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) { - cfqq->last_fifo_expire = now; - return crq; + crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); + rq = crq->request; + if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { + cfqq->fifo_expire = 1; + return crq; + } } return NULL; } /* - * dispatch a single request from given queue + * Scale schedule slice based on io priority */ +static inline int +cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; + + WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); + + return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); +} + static inline void -cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd, - struct cfq_queue *cfqq) +cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct cfq_rq *crq; + cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; +} - /* - * follow expired path, else get first next available - */ - if ((crq = cfq_check_fifo(cfqq)) == NULL) { - if (cfqd->find_best_crq) - crq = cfqq->next_crq; - else - crq = rb_entry_crq(rb_first(&cfqq->sort_list)); - } +static inline int +cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + const int base_rq = cfqd->cfq_slice_async_rq; - cfqd->last_sector = crq->request->sector + crq->request->nr_sectors; + WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); - /* - * finally, insert request into driver list - */ - cfq_dispatch_sort(q, crq); + return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); } -static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch) +/* + * get next queue for service + */ +static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) { - struct cfq_data *cfqd = q->elevator->elevator_data; + unsigned long now = jiffies; struct cfq_queue *cfqq; - struct list_head *entry, *tmp; - int queued, busy_queues, first_round; - if (list_empty(&cfqd->rr_list)) - return 0; + cfqq = cfqd->active_queue; + if (!cfqq) + goto new_queue; - queued = 0; - first_round = 1; -restart: - busy_queues = 0; - list_for_each_safe(entry, tmp, &cfqd->rr_list) { - cfqq = list_entry_cfqq(entry); + /* + * slice has expired + */ + if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end)) + goto new_queue; - BUG_ON(RB_EMPTY(&cfqq->sort_list)); + /* + * if queue has requests, dispatch one. if not, check if + * enough slice is left to wait for one + */ + if (!RB_EMPTY(&cfqq->sort_list)) + goto keep_queue; + else if (!force && cfq_cfqq_sync(cfqq) && + time_before(now, cfqq->slice_end)) { + if (cfq_arm_slice_timer(cfqd, cfqq)) + return NULL; + } + +new_queue: + cfq_slice_expired(cfqd, 0); + cfq_set_active_queue(cfqd); +keep_queue: + return cfqd->active_queue; +} + +static int +__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, + int max_dispatch) +{ + int dispatched = 0; + + BUG_ON(RB_EMPTY(&cfqq->sort_list)); + + do { + struct cfq_rq *crq; /* - * first round of queueing, only select from queues that - * don't already have io in-flight + * follow expired path, else get first next available */ - if (first_round && cfqq->in_flight) - continue; + if ((crq = cfq_check_fifo(cfqq)) == NULL) + crq = cfqq->next_crq; + + /* + * finally, insert request into driver dispatch list + */ + cfq_dispatch_sort(cfqd->queue, crq); - cfq_dispatch_request(q, cfqd, cfqq); + cfqd->dispatch_slice++; + dispatched++; - if (!RB_EMPTY(&cfqq->sort_list)) - busy_queues++; + if (!cfqd->active_cic) { + atomic_inc(&crq->io_context->ioc->refcount); + cfqd->active_cic = crq->io_context; + } - queued++; - } + if (RB_EMPTY(&cfqq->sort_list)) + break; + + } while (dispatched < max_dispatch); + + /* + * if slice end isn't set yet, set it. if at least one request was + * sync, use the sync time slice value + */ + if (!cfqq->slice_end) + cfq_set_prio_slice(cfqd, cfqq); + + /* + * expire an async queue immediately if it has used up its slice. idle + * queue always expire after 1 dispatch round. + */ + if ((!cfq_cfqq_sync(cfqq) && + cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || + cfq_class_idle(cfqq)) + cfq_slice_expired(cfqd, 0); + + return dispatched; +} + +static int +cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_queue *cfqq; + + if (!cfqd->busy_queues) + return 0; + + cfqq = cfq_select_queue(cfqd, force); + if (cfqq) { + cfqq->wait_request = 0; + cfqq->must_dispatch = 0; + del_timer(&cfqd->idle_slice_timer); + + if (cfq_class_idle(cfqq)) + max_dispatch = 1; - if ((queued < max_dispatch) && (busy_queues || first_round)) { - first_round = 0; - goto restart; + return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); } - return queued; + return 0; } static inline void cfq_account_dispatch(struct cfq_rq *crq) { struct cfq_queue *cfqq = crq->cfq_queue; struct cfq_data *cfqd = cfqq->cfqd; - unsigned long now, elapsed; - if (!blk_fs_request(crq->request)) + if (unlikely(!blk_fs_request(crq->request))) return; /* @@ -879,65 +1110,34 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq) if (crq->accounted) return; - now = jiffies; - if (cfqq->service_start == ~0UL) - cfqq->service_start = now; - - /* - * on drives with tagged command queueing, command turn-around time - * doesn't necessarily reflect the time spent processing this very - * command inside the drive. so do the accounting differently there, - * by just sorting on the number of requests - */ - if (cfqd->cfq_tagged) { - if (time_after(now, cfqq->service_start + cfq_service)) { - cfqq->service_start = now; - cfqq->service_used /= 10; - } - - cfqq->service_used++; - cfq_sort_rr_list(cfqq, 0); - } - - elapsed = now - crq->queue_start; - if (elapsed > max_elapsed_dispatch) - max_elapsed_dispatch = elapsed; - crq->accounted = 1; - crq->service_start = now; - - if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) { - cfqq->cfqd->cfq_tagged = 1; - printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG); - } + cfqd->rq_in_driver++; } static inline void cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) { struct cfq_data *cfqd = cfqq->cfqd; + unsigned long now; if (!crq->accounted) return; + now = jiffies; + WARN_ON(!cfqd->rq_in_driver); cfqd->rq_in_driver--; - if (!cfqd->cfq_tagged) { - unsigned long now = jiffies; - unsigned long duration = now - crq->service_start; + if (!cfq_class_idle(cfqq)) + cfqd->last_end_request = now; - if (time_after(now, cfqq->service_start + cfq_service)) { - cfqq->service_start = now; - cfqq->service_used >>= 3; - } - - cfqq->service_used += duration; - cfq_sort_rr_list(cfqq, 0); - - if (duration > max_elapsed_crq) - max_elapsed_crq = duration; + if (!cfqq->in_flight && cfqq->on_rr) { + cfqq->service_last = now; + cfq_resort_rr_list(cfqq, 0); } + + if (crq->is_sync) + crq->io_context->last_end_request = now; } static struct request *cfq_next_request(request_queue_t *q) @@ -950,7 +1150,15 @@ static struct request *cfq_next_request(request_queue_t *q) dispatch: rq = list_entry_rq(q->queue_head.next); - if ((crq = RQ_DATA(rq)) != NULL) { + crq = RQ_DATA(rq); + if (crq) { + /* + * if idle window is disabled, allow queue buildup + */ + if (!crq->in_flight && !crq->cfq_queue->idle_window && + cfqd->rq_in_driver >= cfqd->cfq_max_depth) + return NULL; + cfq_remove_merge_hints(q, crq); cfq_account_dispatch(crq); } @@ -958,7 +1166,7 @@ dispatch: return rq; } - if (cfq_dispatch_requests(q, cfqd->cfq_quantum)) + if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0)) goto dispatch; return NULL; @@ -972,14 +1180,22 @@ dispatch: */ static void cfq_put_queue(struct cfq_queue *cfqq) { - BUG_ON(!atomic_read(&cfqq->ref)); + struct cfq_data *cfqd = cfqq->cfqd; + + BUG_ON(atomic_read(&cfqq->ref) <= 0); if (!atomic_dec_and_test(&cfqq->ref)) return; BUG_ON(rb_first(&cfqq->sort_list)); + BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); BUG_ON(cfqq->on_rr); + if (unlikely(cfqd->active_queue == cfqq)) { + cfq_slice_expired(cfqd, 0); + kblockd_schedule_work(&cfqd->unplug_work); + } + cfq_put_cfqd(cfqq->cfqd); /* @@ -991,7 +1207,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) } static inline struct cfq_queue * -__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval) +__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval) { struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; struct hlist_node *entry, *next; @@ -1007,94 +1223,220 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval) } static struct cfq_queue * -cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key) +cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key) { return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT)); } -static inline void -cfq_rehash_cfqq(struct cfq_data *cfqd, struct cfq_queue **cfqq, - struct cfq_io_context *cic) +static void cfq_free_io_context(struct cfq_io_context *cic) { - unsigned long hashkey = cfq_hash_key(cfqd, current); - unsigned long hashval = hash_long(hashkey, CFQ_QHASH_SHIFT); - struct cfq_queue *__cfqq; - unsigned long flags; + struct cfq_io_context *__cic; + struct list_head *entry, *next; - spin_lock_irqsave(cfqd->queue->queue_lock, flags); - - hlist_del(&(*cfqq)->cfq_hash); - - __cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval); - if (!__cfqq || __cfqq == *cfqq) { - __cfqq = *cfqq; - hlist_add_head(&__cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); - __cfqq->key_type = cfqd->key_type; - } else { - atomic_inc(&__cfqq->ref); - cic->cfqq = __cfqq; - cfq_put_queue(*cfqq); - *cfqq = __cfqq; + list_for_each_safe(entry, next, &cic->list) { + __cic = list_entry(entry, struct cfq_io_context, list); + kmem_cache_free(cfq_ioc_pool, __cic); } - cic->cfqq = __cfqq; - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); + kmem_cache_free(cfq_ioc_pool, cic); } -static void cfq_free_io_context(struct cfq_io_context *cic) +/* + * Called with interrupts disabled + */ +static void cfq_exit_single_io_context(struct cfq_io_context *cic) { - kmem_cache_free(cfq_ioc_pool, cic); + struct cfq_data *cfqd = cic->cfqq->cfqd; + request_queue_t *q = cfqd->queue; + + WARN_ON(!irqs_disabled()); + + spin_lock(q->queue_lock); + + if (unlikely(cic->cfqq == cfqd->active_queue)) { + cfq_slice_expired(cfqd, 0); + kblockd_schedule_work(&cfqd->unplug_work); + } + + cfq_put_queue(cic->cfqq); + cic->cfqq = NULL; + spin_unlock(q->queue_lock); } /* - * locking hierarchy is: io_context lock -> queue locks + * Another task may update the task cic list, if it is doing a queue lookup + * on its behalf. cfq_cic_lock excludes such concurrent updates */ static void cfq_exit_io_context(struct cfq_io_context *cic) { - struct cfq_queue *cfqq = cic->cfqq; - struct list_head *entry = &cic->list; - request_queue_t *q; + struct cfq_io_context *__cic; + struct list_head *entry; unsigned long flags; + local_irq_save(flags); + /* * put the reference this task is holding to the various queues */ - spin_lock_irqsave(&cic->ioc->lock, flags); - while ((entry = cic->list.next) != &cic->list) { - struct cfq_io_context *__cic; - + list_for_each(entry, &cic->list) { __cic = list_entry(entry, struct cfq_io_context, list); - list_del(entry); - - q = __cic->cfqq->cfqd->queue; - spin_lock(q->queue_lock); - cfq_put_queue(__cic->cfqq); - spin_unlock(q->queue_lock); + cfq_exit_single_io_context(__cic); } - q = cfqq->cfqd->queue; - spin_lock(q->queue_lock); - cfq_put_queue(cfqq); - spin_unlock(q->queue_lock); - - cic->cfqq = NULL; - spin_unlock_irqrestore(&cic->ioc->lock, flags); + cfq_exit_single_io_context(cic); + local_irq_restore(flags); } -static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags) +static struct cfq_io_context * +cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask) { - struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_flags); + struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); if (cic) { - cic->dtor = cfq_free_io_context; - cic->exit = cfq_exit_io_context; INIT_LIST_HEAD(&cic->list); cic->cfqq = NULL; + cic->key = NULL; + cic->last_end_request = jiffies; + cic->ttime_total = 0; + cic->ttime_samples = 0; + cic->ttime_mean = 0; + cic->dtor = cfq_free_io_context; + cic->exit = cfq_exit_io_context; } return cic; } +static void cfq_init_prio_data(struct cfq_queue *cfqq) +{ + struct task_struct *tsk = current; + int ioprio_class; + + if (!cfqq->prio_changed) + return; + + ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); + switch (ioprio_class) { + default: + printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); + case IOPRIO_CLASS_NONE: + /* + * no prio set, place us in the middle of the BE classes + */ + cfqq->ioprio = task_nice_ioprio(tsk); + cfqq->ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_RT: + cfqq->ioprio = task_ioprio(tsk); + cfqq->ioprio_class = IOPRIO_CLASS_RT; + break; + case IOPRIO_CLASS_BE: + cfqq->ioprio = task_ioprio(tsk); + cfqq->ioprio_class = IOPRIO_CLASS_BE; + break; + case IOPRIO_CLASS_IDLE: + cfqq->ioprio_class = IOPRIO_CLASS_IDLE; + cfqq->ioprio = 7; + cfqq->idle_window = 0; + break; + } + + /* + * keep track of original prio settings in case we have to temporarily + * elevate the priority of this queue + */ + cfqq->org_ioprio = cfqq->ioprio; + cfqq->org_ioprio_class = cfqq->ioprio_class; + + if (cfqq->on_rr) + cfq_resort_rr_list(cfqq, 0); + + cfqq->prio_changed = 0; +} + +static inline void changed_ioprio(struct cfq_queue *cfqq) +{ + if (cfqq) { + struct cfq_data *cfqd = cfqq->cfqd; + + spin_lock(cfqd->queue->queue_lock); + cfqq->prio_changed = 1; + cfq_init_prio_data(cfqq); + spin_unlock(cfqd->queue->queue_lock); + } +} + +/* + * callback from sys_ioprio_set, irqs are disabled + */ +static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) +{ + struct cfq_io_context *cic = ioc->cic; + + changed_ioprio(cic->cfqq); + + list_for_each_entry(cic, &cic->list, list) + changed_ioprio(cic->cfqq); + + return 0; +} + +static struct cfq_queue * +cfq_get_queue(struct cfq_data *cfqd, unsigned int key, int gfp_mask) +{ + const int hashval = hash_long(key, CFQ_QHASH_SHIFT); + struct cfq_queue *cfqq, *new_cfqq = NULL; + +retry: + cfqq = __cfq_find_cfq_hash(cfqd, key, hashval); + + if (!cfqq) { + if (new_cfqq) { + cfqq = new_cfqq; + new_cfqq = NULL; + } else if (gfp_mask & __GFP_WAIT) { + spin_unlock_irq(cfqd->queue->queue_lock); + new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + spin_lock_irq(cfqd->queue->queue_lock); + goto retry; + } else { + cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + if (!cfqq) + goto out; + } + + memset(cfqq, 0, sizeof(*cfqq)); + + INIT_HLIST_NODE(&cfqq->cfq_hash); + INIT_LIST_HEAD(&cfqq->cfq_list); + RB_CLEAR_ROOT(&cfqq->sort_list); + INIT_LIST_HEAD(&cfqq->fifo); + + cfqq->key = key; + hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); + atomic_set(&cfqq->ref, 0); + cfqq->cfqd = cfqd; + atomic_inc(&cfqd->ref); + cfqq->service_last = 0; + /* + * set ->slice_left to allow preemption for a new process + */ + cfqq->slice_left = 2 * cfqd->cfq_slice_idle; + cfqq->idle_window = 1; + cfqq->ioprio = -1; + cfqq->ioprio_class = -1; + cfqq->prio_changed = 1; + } + + if (new_cfqq) + kmem_cache_free(cfq_pool, new_cfqq); + + atomic_inc(&cfqq->ref); +out: + WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); + return cfqq; +} + /* * Setup general io context and cfq io context. There can be several cfq * io contexts per general io context, if this process is doing io to more @@ -1102,39 +1444,39 @@ static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags) * cfqq, so we don't need to worry about it disappearing */ static struct cfq_io_context * -cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags) +cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask) { - struct cfq_data *cfqd = (*cfqq)->cfqd; - struct cfq_queue *__cfqq = *cfqq; + struct io_context *ioc = NULL; struct cfq_io_context *cic; - struct io_context *ioc; - might_sleep_if(gfp_flags & __GFP_WAIT); + might_sleep_if(gfp_mask & __GFP_WAIT); - ioc = get_io_context(gfp_flags); + ioc = get_io_context(gfp_mask); if (!ioc) return NULL; if ((cic = ioc->cic) == NULL) { - cic = cfq_alloc_io_context(gfp_flags); + cic = cfq_alloc_io_context(cfqd, gfp_mask); if (cic == NULL) goto err; + /* + * manually increment generic io_context usage count, it + * cannot go away since we are already holding one ref to it + */ ioc->cic = cic; + ioc->set_ioprio = cfq_ioc_set_ioprio; cic->ioc = ioc; - cic->cfqq = __cfqq; - atomic_inc(&__cfqq->ref); + cic->key = cfqd; + atomic_inc(&cfqd->ref); } else { struct cfq_io_context *__cic; - unsigned long flags; /* - * since the first cic on the list is actually the head - * itself, need to check this here or we'll duplicate an - * cic per ioc for no reason + * the first cic on the list is actually the head itself */ - if (cic->cfqq == __cfqq) + if (cic->key == cfqd) goto out; /* @@ -1142,152 +1484,259 @@ cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags) * should be ok here, the list will usually not be more than * 1 or a few entries long */ - spin_lock_irqsave(&ioc->lock, flags); list_for_each_entry(__cic, &cic->list, list) { /* * this process is already holding a reference to * this queue, so no need to get one more */ - if (__cic->cfqq == __cfqq) { + if (__cic->key == cfqd) { cic = __cic; - spin_unlock_irqrestore(&ioc->lock, flags); goto out; } } - spin_unlock_irqrestore(&ioc->lock, flags); /* * nope, process doesn't have a cic assoicated with this * cfqq yet. get a new one and add to list */ - __cic = cfq_alloc_io_context(gfp_flags); + __cic = cfq_alloc_io_context(cfqd, gfp_mask); if (__cic == NULL) goto err; __cic->ioc = ioc; - __cic->cfqq = __cfqq; - atomic_inc(&__cfqq->ref); - spin_lock_irqsave(&ioc->lock, flags); + __cic->key = cfqd; + atomic_inc(&cfqd->ref); list_add(&__cic->list, &cic->list); - spin_unlock_irqrestore(&ioc->lock, flags); - cic = __cic; - *cfqq = __cfqq; } out: - /* - * if key_type has been changed on the fly, we lazily rehash - * each queue at lookup time - */ - if ((*cfqq)->key_type != cfqd->key_type) - cfq_rehash_cfqq(cfqd, cfqq, cic); - return cic; err: put_io_context(ioc); return NULL; } -static struct cfq_queue * -__cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask) +static void +cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) { - const int hashval = hash_long(key, CFQ_QHASH_SHIFT); - struct cfq_queue *cfqq, *new_cfqq = NULL; + unsigned long elapsed, ttime; -retry: - cfqq = __cfq_find_cfq_hash(cfqd, key, hashval); + /* + * if this context already has stuff queued, thinktime is from + * last queue not last end + */ +#if 0 + if (time_after(cic->last_end_request, cic->last_queue)) + elapsed = jiffies - cic->last_end_request; + else + elapsed = jiffies - cic->last_queue; +#else + elapsed = jiffies - cic->last_end_request; +#endif - if (!cfqq) { - if (new_cfqq) { - cfqq = new_cfqq; - new_cfqq = NULL; - } else { - spin_unlock_irq(cfqd->queue->queue_lock); - new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); - spin_lock_irq(cfqd->queue->queue_lock); + ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); - if (!new_cfqq && !(gfp_mask & __GFP_WAIT)) - goto out; - - goto retry; - } + cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; + cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; + cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; +} - memset(cfqq, 0, sizeof(*cfqq)); +#define sample_valid(samples) ((samples) > 80) - INIT_HLIST_NODE(&cfqq->cfq_hash); - INIT_LIST_HEAD(&cfqq->cfq_list); - RB_CLEAR_ROOT(&cfqq->sort_list); - INIT_LIST_HEAD(&cfqq->fifo[0]); - INIT_LIST_HEAD(&cfqq->fifo[1]); +/* + * Disable idle window if the process thinks too long or seeks so much that + * it doesn't matter + */ +static void +cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct cfq_io_context *cic) +{ + int enable_idle = cfqq->idle_window; - cfqq->key = key; - hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); - atomic_set(&cfqq->ref, 0); - cfqq->cfqd = cfqd; - atomic_inc(&cfqd->ref); - cfqq->key_type = cfqd->key_type; - cfqq->service_start = ~0UL; + if (!cic->ioc->task || !cfqd->cfq_slice_idle) + enable_idle = 0; + else if (sample_valid(cic->ttime_samples)) { + if (cic->ttime_mean > cfqd->cfq_slice_idle) + enable_idle = 0; + else + enable_idle = 1; } - if (new_cfqq) - kmem_cache_free(cfq_pool, new_cfqq); + cfqq->idle_window = enable_idle; +} - atomic_inc(&cfqq->ref); -out: - WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq); - return cfqq; + +/* + * Check if new_cfqq should preempt the currently active queue. Return 0 for + * no or if we aren't sure, a 1 will cause a preempt. + */ +static int +cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, + struct cfq_rq *crq) +{ + struct cfq_queue *cfqq = cfqd->active_queue; + + if (cfq_class_idle(new_cfqq)) + return 0; + + if (!cfqq) + return 1; + + if (cfq_class_idle(cfqq)) + return 1; + if (!new_cfqq->wait_request) + return 0; + /* + * if it doesn't have slice left, forget it + */ + if (new_cfqq->slice_left < cfqd->cfq_slice_idle) + return 0; + if (crq->is_sync && !cfq_cfqq_sync(cfqq)) + return 1; + + return 0; +} + +/* + * cfqq preempts the active queue. if we allowed preempt with no slice left, + * let it have half of its nominal slice. + */ +static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + struct cfq_queue *__cfqq, *next; + + list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) + cfq_resort_rr_list(__cfqq, 1); + + if (!cfqq->slice_left) + cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; + + cfqq->slice_end = cfqq->slice_left + jiffies; + cfq_slice_expired(cfqd, 1); + __cfq_set_active_queue(cfqd, cfqq); +} + +/* + * should really be a ll_rw_blk.c helper + */ +static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + request_queue_t *q = cfqd->queue; + + if (!blk_queue_plugged(q)) + q->request_fn(q); + else + __generic_unplug_device(q); +} + +/* + * Called when a new fs request (crq) is added (to cfqq). Check if there's + * something we should do about it + */ +static void +cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct cfq_rq *crq) +{ + const int sync = crq->is_sync; + + cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); + + if (sync) { + struct cfq_io_context *cic = crq->io_context; + + cfq_update_io_thinktime(cfqd, cic); + cfq_update_idle_window(cfqd, cfqq, cic); + + cic->last_queue = jiffies; + } + + if (cfqq == cfqd->active_queue) { + /* + * if we are waiting for a request for this queue, let it rip + * immediately and flag that we must not expire this queue + * just now + */ + if (cfqq->wait_request) { + cfqq->must_dispatch = 1; + del_timer(&cfqd->idle_slice_timer); + cfq_start_queueing(cfqd, cfqq); + } + } else if (cfq_should_preempt(cfqd, cfqq, crq)) { + /* + * not the active queue - expire current slice if it is + * idle and has expired it's mean thinktime or this new queue + * has some old slice time left and is of higher priority + */ + cfq_preempt_queue(cfqd, cfqq); + cfqq->must_dispatch = 1; + cfq_start_queueing(cfqd, cfqq); + } } -static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq) +static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq) { - crq->is_sync = 0; - if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE) - crq->is_sync = 1; + struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_queue *cfqq = crq->cfq_queue; + + cfq_init_prio_data(cfqq); cfq_add_crq_rb(crq); - crq->queue_start = jiffies; - list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]); + list_add_tail(&rq->queuelist, &cfqq->fifo); + + if (rq_mergeable(rq)) { + cfq_add_crq_hash(cfqd, crq); + + if (!cfqd->queue->last_merge) + cfqd->queue->last_merge = rq; + } + + cfq_crq_enqueued(cfqd, cfqq, crq); } static void cfq_insert_request(request_queue_t *q, struct request *rq, int where) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); switch (where) { case ELEVATOR_INSERT_BACK: - while (cfq_dispatch_requests(q, cfqd->cfq_quantum)) + while (cfq_dispatch_requests(q, INT_MAX, 1)) ; list_add_tail(&rq->queuelist, &q->queue_head); + /* + * If we were idling with pending requests on + * inactive cfqqs, force dispatching will + * remove the idle timer and the queue won't + * be kicked by __make_request() afterward. + * Kick it here. + */ + kblockd_schedule_work(&cfqd->unplug_work); break; case ELEVATOR_INSERT_FRONT: list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); - cfq_enqueue(cfqd, crq); + cfq_enqueue(cfqd, rq); break; default: printk("%s: bad insert point %d\n", __FUNCTION__,where); return; } +} - if (rq_mergeable(rq)) { - cfq_add_crq_hash(cfqd, crq); - - if (!q->last_merge) - q->last_merge = rq; - } +static inline int cfq_pending_requests(struct cfq_data *cfqd) +{ + return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues; } static int cfq_queue_empty(request_queue_t *q) { struct cfq_data *cfqd = q->elevator->elevator_data; - return list_empty(&q->queue_head) && list_empty(&cfqd->rr_list); + return !cfq_pending_requests(cfqd); } static void cfq_completed_request(request_queue_t *q, struct request *rq) @@ -1332,51 +1781,132 @@ cfq_latter_request(request_queue_t *q, struct request *rq) return NULL; } -static int cfq_may_queue(request_queue_t *q, int rw) +/* + * we temporarily boost lower priority queues if they are holding fs exclusive + * resources. they are boosted to normal prio (CLASS_BE/4) + */ +static void cfq_prio_boost(struct cfq_queue *cfqq) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq; - int ret = ELV_MQUEUE_MAY; + const int ioprio_class = cfqq->ioprio_class; + const int ioprio = cfqq->ioprio; - if (current->flags & PF_MEMALLOC) - return ELV_MQUEUE_MAY; + if (has_fs_excl()) { + /* + * boost idle prio on transactions that would lock out other + * users of the filesystem + */ + if (cfq_class_idle(cfqq)) + cfqq->ioprio_class = IOPRIO_CLASS_BE; + if (cfqq->ioprio > IOPRIO_NORM) + cfqq->ioprio = IOPRIO_NORM; + } else { + /* + * check if we need to unboost the queue + */ + if (cfqq->ioprio_class != cfqq->org_ioprio_class) + cfqq->ioprio_class = cfqq->org_ioprio_class; + if (cfqq->ioprio != cfqq->org_ioprio) + cfqq->ioprio = cfqq->org_ioprio; + } - cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current)); - if (cfqq) { - int limit = cfqd->max_queued; + /* + * refile between round-robin lists if we moved the priority class + */ + if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && + cfqq->on_rr) + cfq_resort_rr_list(cfqq, 0); +} - if (cfqq->allocated[rw] < cfqd->cfq_queued) - return ELV_MQUEUE_MUST; +static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) +{ + if (rw == READ || process_sync(task)) + return task->pid; - if (cfqd->busy_queues) - limit = q->nr_requests / cfqd->busy_queues; + return CFQ_KEY_ASYNC; +} - if (limit < cfqd->cfq_queued) - limit = cfqd->cfq_queued; - else if (limit > cfqd->max_queued) - limit = cfqd->max_queued; +static inline int +__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct task_struct *task, int rw) +{ + if (cfqq->wait_request && cfqq->must_alloc) + return ELV_MQUEUE_MUST; - if (cfqq->allocated[rw] >= limit) { - if (limit > cfqq->alloc_limit[rw]) - cfqq->alloc_limit[rw] = limit; + return ELV_MQUEUE_MAY; +#if 0 + if (!cfqq || task->flags & PF_MEMALLOC) + return ELV_MQUEUE_MAY; + if (!cfqq->allocated[rw] || cfqq->must_alloc) { + if (cfqq->wait_request) + return ELV_MQUEUE_MUST; - ret = ELV_MQUEUE_NO; + /* + * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we + * can quickly flood the queue with writes from a single task + */ + if (rw == READ || !cfqq->must_alloc_slice) { + cfqq->must_alloc_slice = 1; + return ELV_MQUEUE_MUST; } + + return ELV_MQUEUE_MAY; } + if (cfq_class_idle(cfqq)) + return ELV_MQUEUE_NO; + if (cfqq->allocated[rw] >= cfqd->max_queued) { + struct io_context *ioc = get_io_context(GFP_ATOMIC); + int ret = ELV_MQUEUE_NO; - return ret; + if (ioc && ioc->nr_batch_requests) + ret = ELV_MQUEUE_MAY; + + put_io_context(ioc); + return ret; + } + + return ELV_MQUEUE_MAY; +#endif +} + +static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; + struct cfq_queue *cfqq; + + /* + * don't force setup of a queue from here, as a call to may_queue + * does not necessarily imply that a request actually will be queued. + * so just lookup a possibly existing queue, or return 'may queue' + * if that fails + */ + cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw)); + if (cfqq) { + cfq_init_prio_data(cfqq); + cfq_prio_boost(cfqq); + + return __cfq_may_queue(cfqd, cfqq, tsk, rw); + } + + return ELV_MQUEUE_MAY; } static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) { + struct cfq_data *cfqd = q->elevator->elevator_data; struct request_list *rl = &q->rq; - const int write = waitqueue_active(&rl->wait[WRITE]); - const int read = waitqueue_active(&rl->wait[READ]); - if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ]) - wake_up(&rl->wait[READ]); - if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE]) - wake_up(&rl->wait[WRITE]); + if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) { + smp_mb(); + if (waitqueue_active(&rl->wait[READ])) + wake_up(&rl->wait[READ]); + } + + if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) { + smp_mb(); + if (waitqueue_active(&rl->wait[WRITE])) + wake_up(&rl->wait[WRITE]); + } } /* @@ -1389,69 +1919,59 @@ static void cfq_put_request(request_queue_t *q, struct request *rq) if (crq) { struct cfq_queue *cfqq = crq->cfq_queue; + const int rw = rq_data_dir(rq); - BUG_ON(q->last_merge == rq); - BUG_ON(!hlist_unhashed(&crq->hash)); - - if (crq->io_context) - put_io_context(crq->io_context->ioc); + BUG_ON(!cfqq->allocated[rw]); + cfqq->allocated[rw]--; - BUG_ON(!cfqq->allocated[crq->is_write]); - cfqq->allocated[crq->is_write]--; + put_io_context(crq->io_context->ioc); mempool_free(crq, cfqd->crq_pool); rq->elevator_private = NULL; - smp_mb(); cfq_check_waiters(q, cfqq); cfq_put_queue(cfqq); } } /* - * Allocate cfq data structures associated with this request. A queue and + * Allocate cfq data structures associated with this request. */ -static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +static int +cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, + int gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_io_context *cic; const int rw = rq_data_dir(rq); - struct cfq_queue *cfqq, *saved_cfqq; + struct cfq_queue *cfqq; struct cfq_rq *crq; unsigned long flags; might_sleep_if(gfp_mask & __GFP_WAIT); + cic = cfq_get_io_context(cfqd, cfq_queue_pid(current, rw), gfp_mask); + spin_lock_irqsave(q->queue_lock, flags); - cfqq = __cfq_get_queue(cfqd, cfq_hash_key(cfqd, current), gfp_mask); - if (!cfqq) - goto out_lock; + if (!cic) + goto queue_fail; + + if (!cic->cfqq) { + cfqq = cfq_get_queue(cfqd, current->pid, gfp_mask); + if (!cfqq) + goto queue_fail; -repeat: - if (cfqq->allocated[rw] >= cfqd->max_queued) - goto out_lock; + cic->cfqq = cfqq; + } else + cfqq = cic->cfqq; cfqq->allocated[rw]++; + cfqq->must_alloc = 0; + cfqd->rq_starved = 0; + atomic_inc(&cfqq->ref); spin_unlock_irqrestore(q->queue_lock, flags); - /* - * if hashing type has changed, the cfq_queue might change here. - */ - saved_cfqq = cfqq; - cic = cfq_get_io_context(&cfqq, gfp_mask); - if (!cic) - goto err; - - /* - * repeat allocation checks on queue change - */ - if (unlikely(saved_cfqq != cfqq)) { - spin_lock_irqsave(q->queue_lock, flags); - saved_cfqq->allocated[rw]--; - goto repeat; - } - crq = mempool_alloc(cfqd->crq_pool, gfp_mask); if (crq) { RB_CLEAR(&crq->rb_node); @@ -1460,24 +1980,130 @@ repeat: INIT_HLIST_NODE(&crq->hash); crq->cfq_queue = cfqq; crq->io_context = cic; - crq->service_start = crq->queue_start = 0; - crq->in_flight = crq->accounted = crq->is_sync = 0; - crq->is_write = rw; + crq->in_flight = crq->accounted = 0; + crq->is_sync = (rw == READ || process_sync(current)); + crq->requeued = 0; rq->elevator_private = crq; - cfqq->alloc_limit[rw] = 0; return 0; } - put_io_context(cic->ioc); -err: spin_lock_irqsave(q->queue_lock, flags); cfqq->allocated[rw]--; + if (!(cfqq->allocated[0] + cfqq->allocated[1])) + cfqq->must_alloc = 1; cfq_put_queue(cfqq); -out_lock: +queue_fail: + if (cic) + put_io_context(cic->ioc); + /* + * mark us rq allocation starved. we need to kickstart the process + * ourselves if there are no pending requests that can do it for us. + * that would be an extremely rare OOM situation + */ + cfqd->rq_starved = 1; + kblockd_schedule_work(&cfqd->unplug_work); spin_unlock_irqrestore(q->queue_lock, flags); return 1; } +static void cfq_kick_queue(void *data) +{ + request_queue_t *q = data; + struct cfq_data *cfqd = q->elevator->elevator_data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + + if (cfqd->rq_starved) { + struct request_list *rl = &q->rq; + + /* + * we aren't guaranteed to get a request after this, but we + * have to be opportunistic + */ + smp_mb(); + if (waitqueue_active(&rl->wait[READ])) + wake_up(&rl->wait[READ]); + if (waitqueue_active(&rl->wait[WRITE])) + wake_up(&rl->wait[WRITE]); + } + + blk_remove_plug(q); + q->request_fn(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/* + * Timer running if the active_queue is currently idling inside its time slice + */ +static void cfq_idle_slice_timer(unsigned long data) +{ + struct cfq_data *cfqd = (struct cfq_data *) data; + struct cfq_queue *cfqq; + unsigned long flags; + + spin_lock_irqsave(cfqd->queue->queue_lock, flags); + + if ((cfqq = cfqd->active_queue) != NULL) { + unsigned long now = jiffies; + + /* + * expired + */ + if (time_after(now, cfqq->slice_end)) + goto expire; + + /* + * only expire and reinvoke request handler, if there are + * other queues with pending requests + */ + if (!cfq_pending_requests(cfqd)) { + cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end); + add_timer(&cfqd->idle_slice_timer); + goto out_cont; + } + + /* + * not expired and it has a request pending, let it dispatch + */ + if (!RB_EMPTY(&cfqq->sort_list)) { + cfqq->must_dispatch = 1; + goto out_kick; + } + } +expire: + cfq_slice_expired(cfqd, 0); +out_kick: + if (cfq_pending_requests(cfqd)) + kblockd_schedule_work(&cfqd->unplug_work); +out_cont: + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); +} + +/* + * Timer running if an idle class queue is waiting for service + */ +static void cfq_idle_class_timer(unsigned long data) +{ + struct cfq_data *cfqd = (struct cfq_data *) data; + unsigned long flags, end; + + spin_lock_irqsave(cfqd->queue->queue_lock, flags); + + /* + * race with a non-idle queue, reset timer + */ + end = cfqd->last_end_request + CFQ_IDLE_GRACE; + if (!time_after_eq(jiffies, end)) { + cfqd->idle_class_timer.expires = end; + add_timer(&cfqd->idle_class_timer); + } else + kblockd_schedule_work(&cfqd->unplug_work); + + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); +} + + static void cfq_put_cfqd(struct cfq_data *cfqd) { request_queue_t *q = cfqd->queue; @@ -1485,6 +2111,8 @@ static void cfq_put_cfqd(struct cfq_data *cfqd) if (!atomic_dec_and_test(&cfqd->ref)) return; + blk_sync_queue(q); + blk_put_queue(q); mempool_destroy(cfqd->crq_pool); @@ -1495,7 +2123,11 @@ static void cfq_put_cfqd(struct cfq_data *cfqd) static void cfq_exit_queue(elevator_t *e) { - cfq_put_cfqd(e->elevator_data); + struct cfq_data *cfqd = e->elevator_data; + + del_timer_sync(&cfqd->idle_slice_timer); + del_timer_sync(&cfqd->idle_class_timer); + cfq_put_cfqd(cfqd); } static int cfq_init_queue(request_queue_t *q, elevator_t *e) @@ -1508,7 +2140,13 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) return -ENOMEM; memset(cfqd, 0, sizeof(*cfqd)); - INIT_LIST_HEAD(&cfqd->rr_list); + + for (i = 0; i < CFQ_PRIO_LISTS; i++) + INIT_LIST_HEAD(&cfqd->rr_list[i]); + + INIT_LIST_HEAD(&cfqd->busy_rr); + INIT_LIST_HEAD(&cfqd->cur_rr); + INIT_LIST_HEAD(&cfqd->idle_rr); INIT_LIST_HEAD(&cfqd->empty_list); cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); @@ -1533,25 +2171,32 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) cfqd->queue = q; atomic_inc(&q->refcnt); - /* - * just set it to some high value, we want anyone to be able to queue - * some requests. fairness is handled differently - */ - q->nr_requests = 1024; - cfqd->max_queued = q->nr_requests / 16; + cfqd->max_queued = q->nr_requests / 4; q->nr_batching = cfq_queued; - cfqd->key_type = CFQ_KEY_TGID; - cfqd->find_best_crq = 1; + + init_timer(&cfqd->idle_slice_timer); + cfqd->idle_slice_timer.function = cfq_idle_slice_timer; + cfqd->idle_slice_timer.data = (unsigned long) cfqd; + + init_timer(&cfqd->idle_class_timer); + cfqd->idle_class_timer.function = cfq_idle_class_timer; + cfqd->idle_class_timer.data = (unsigned long) cfqd; + + INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); + atomic_set(&cfqd->ref, 1); cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; - cfqd->cfq_fifo_expire_r = cfq_fifo_expire_r; - cfqd->cfq_fifo_expire_w = cfq_fifo_expire_w; - cfqd->cfq_fifo_batch_expire = cfq_fifo_rate; + cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; + cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; cfqd->cfq_back_max = cfq_back_max; cfqd->cfq_back_penalty = cfq_back_penalty; - + cfqd->cfq_slice[0] = cfq_slice_async; + cfqd->cfq_slice[1] = cfq_slice_sync; + cfqd->cfq_slice_async_rq = cfq_slice_async_rq; + cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->cfq_max_depth = cfq_max_depth; return 0; out_crqpool: kfree(cfqd->cfq_hash); @@ -1595,7 +2240,6 @@ fail: return -ENOMEM; } - /* * sysfs parts below --> */ @@ -1620,45 +2264,6 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) return count; } -static ssize_t -cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count) -{ - max_elapsed_dispatch = max_elapsed_crq = 0; - return count; -} - -static ssize_t -cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count) -{ - spin_lock_irq(cfqd->queue->queue_lock); - if (!strncmp(page, "pgid", 4)) - cfqd->key_type = CFQ_KEY_PGID; - else if (!strncmp(page, "tgid", 4)) - cfqd->key_type = CFQ_KEY_TGID; - else if (!strncmp(page, "uid", 3)) - cfqd->key_type = CFQ_KEY_UID; - else if (!strncmp(page, "gid", 3)) - cfqd->key_type = CFQ_KEY_GID; - spin_unlock_irq(cfqd->queue->queue_lock); - return count; -} - -static ssize_t -cfq_read_key_type(struct cfq_data *cfqd, char *page) -{ - ssize_t len = 0; - int i; - - for (i = CFQ_KEY_PGID; i < CFQ_KEY_LAST; i++) { - if (cfqd->key_type == i) - len += sprintf(page+len, "[%s] ", cfq_key_types[i]); - else - len += sprintf(page+len, "%s ", cfq_key_types[i]); - } - len += sprintf(page+len, "\n"); - return len; -} - #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ { \ @@ -1669,12 +2274,15 @@ static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \ } SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); -SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r, 1); -SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w, 1); -SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire, 1); -SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0); +SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); +SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0); SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0); +SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); +SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); +SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); +SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); +SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -1694,12 +2302,15 @@ static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \ } STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); -STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX, 1); -STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX, 1); -STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX, 1); -STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0); +STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); +STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); +STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); +STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); +STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); #undef STORE_FUNCTION static struct cfq_fs_entry cfq_quantum_entry = { @@ -1712,25 +2323,15 @@ static struct cfq_fs_entry cfq_queued_entry = { .show = cfq_queued_show, .store = cfq_queued_store, }; -static struct cfq_fs_entry cfq_fifo_expire_r_entry = { +static struct cfq_fs_entry cfq_fifo_expire_sync_entry = { .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_fifo_expire_r_show, - .store = cfq_fifo_expire_r_store, + .show = cfq_fifo_expire_sync_show, + .store = cfq_fifo_expire_sync_store, }; -static struct cfq_fs_entry cfq_fifo_expire_w_entry = { +static struct cfq_fs_entry cfq_fifo_expire_async_entry = { .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_fifo_expire_w_show, - .store = cfq_fifo_expire_w_store, -}; -static struct cfq_fs_entry cfq_fifo_batch_expire_entry = { - .attr = {.name = "fifo_batch_expire", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_fifo_batch_expire_show, - .store = cfq_fifo_batch_expire_store, -}; -static struct cfq_fs_entry cfq_find_best_entry = { - .attr = {.name = "find_best_crq", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_find_best_show, - .store = cfq_find_best_store, + .show = cfq_fifo_expire_async_show, + .store = cfq_fifo_expire_async_store, }; static struct cfq_fs_entry cfq_back_max_entry = { .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR }, @@ -1742,27 +2343,43 @@ static struct cfq_fs_entry cfq_back_penalty_entry = { .show = cfq_back_penalty_show, .store = cfq_back_penalty_store, }; -static struct cfq_fs_entry cfq_clear_elapsed_entry = { - .attr = {.name = "clear_elapsed", .mode = S_IWUSR }, - .store = cfq_clear_elapsed, +static struct cfq_fs_entry cfq_slice_sync_entry = { + .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_slice_sync_show, + .store = cfq_slice_sync_store, }; -static struct cfq_fs_entry cfq_key_type_entry = { - .attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_read_key_type, - .store = cfq_set_key_type, +static struct cfq_fs_entry cfq_slice_async_entry = { + .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_slice_async_show, + .store = cfq_slice_async_store, +}; +static struct cfq_fs_entry cfq_slice_async_rq_entry = { + .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_slice_async_rq_show, + .store = cfq_slice_async_rq_store, +}; +static struct cfq_fs_entry cfq_slice_idle_entry = { + .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_slice_idle_show, + .store = cfq_slice_idle_store, +}; +static struct cfq_fs_entry cfq_max_depth_entry = { + .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR }, + .show = cfq_max_depth_show, + .store = cfq_max_depth_store, }; - static struct attribute *default_attrs[] = { &cfq_quantum_entry.attr, &cfq_queued_entry.attr, - &cfq_fifo_expire_r_entry.attr, - &cfq_fifo_expire_w_entry.attr, - &cfq_fifo_batch_expire_entry.attr, - &cfq_key_type_entry.attr, - &cfq_find_best_entry.attr, + &cfq_fifo_expire_sync_entry.attr, + &cfq_fifo_expire_async_entry.attr, &cfq_back_max_entry.attr, &cfq_back_penalty_entry.attr, - &cfq_clear_elapsed_entry.attr, + &cfq_slice_sync_entry.attr, + &cfq_slice_async_entry.attr, + &cfq_slice_async_rq_entry.attr, + &cfq_slice_idle_entry.attr, + &cfq_max_depth_entry.attr, NULL, }; @@ -1832,21 +2449,46 @@ static int __init cfq_init(void) { int ret; + /* + * could be 0 on HZ < 1000 setups + */ + if (!cfq_slice_async) + cfq_slice_async = 1; + if (!cfq_slice_idle) + cfq_slice_idle = 1; + if (cfq_slab_setup()) return -ENOMEM; ret = elv_register(&iosched_cfq); - if (!ret) { - __module_get(THIS_MODULE); - return 0; - } + if (ret) + cfq_slab_kill(); - cfq_slab_kill(); return ret; } static void __exit cfq_exit(void) { + struct task_struct *g, *p; + unsigned long flags; + + read_lock_irqsave(&tasklist_lock, flags); + + /* + * iterate each process in the system, removing our io_context + */ + do_each_thread(g, p) { + struct io_context *ioc = p->io_context; + + if (ioc && ioc->cic) { + ioc->cic->exit(ioc->cic); + cfq_free_io_context(ioc->cic); + ioc->cic = NULL; + } + } while_each_thread(g, p); + + read_unlock_irqrestore(&tasklist_lock, flags); + cfq_slab_kill(); elv_unregister(&iosched_cfq); } diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c index 4bc2fea7327..ff5201e0215 100644 --- a/drivers/block/deadline-iosched.c +++ b/drivers/block/deadline-iosched.c @@ -760,7 +760,8 @@ static void deadline_put_request(request_queue_t *q, struct request *rq) } static int -deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, + int gfp_mask) { struct deadline_data *dd = q->elevator->elevator_data; struct deadline_rq *drq; diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index f831f08f839..98f0126a2de 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c @@ -486,12 +486,13 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) return NULL; } -int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, + int gfp_mask) { elevator_t *e = q->elevator; if (e->ops->elevator_set_req_fn) - return e->ops->elevator_set_req_fn(q, rq, gfp_mask); + return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); rq->elevator_private = NULL; return 0; @@ -505,12 +506,12 @@ void elv_put_request(request_queue_t *q, struct request *rq) e->ops->elevator_put_req_fn(q, rq); } -int elv_may_queue(request_queue_t *q, int rw) +int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) { elevator_t *e = q->elevator; if (e->ops->elevator_may_queue_fn) - return e->ops->elevator_may_queue_fn(q, rw); + return e->ops->elevator_may_queue_fn(q, rw, bio); return ELV_MQUEUE_MAY; } diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 60e64091de1..234fdcfbdf0 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -276,6 +276,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq) rq->errors = 0; rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; + rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; rq->q = q; @@ -1442,11 +1443,7 @@ void __generic_unplug_device(request_queue_t *q) if (!blk_remove_plug(q)) return; - /* - * was plugged, fire request_fn if queue has stuff to do - */ - if (elv_next_request(q)) - q->request_fn(q); + q->request_fn(q); } EXPORT_SYMBOL(__generic_unplug_device); @@ -1776,8 +1773,8 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) mempool_free(rq, q->rq.rq_pool); } -static inline struct request *blk_alloc_request(request_queue_t *q, int rw, - int gfp_mask) +static inline struct request * +blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -1790,7 +1787,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q, int rw, */ rq->flags = rw; - if (!elv_set_request(q, rq, gfp_mask)) + if (!elv_set_request(q, rq, bio, gfp_mask)) return rq; mempool_free(rq, q->rq.rq_pool); @@ -1872,7 +1869,8 @@ static void freed_request(request_queue_t *q, int rw) /* * Get a free request, queue_lock must not be held */ -static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) +static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, + int gfp_mask) { struct request *rq = NULL; struct request_list *rl = &q->rq; @@ -1895,7 +1893,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) } } - switch (elv_may_queue(q, rw)) { + switch (elv_may_queue(q, rw, bio)) { case ELV_MQUEUE_NO: goto rq_starved; case ELV_MQUEUE_MAY: @@ -1920,7 +1918,7 @@ get_rq: set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw, gfp_mask); + rq = blk_alloc_request(q, rw, bio, gfp_mask); if (!rq) { /* * Allocation failed presumably due to memory. Undo anything @@ -1961,7 +1959,8 @@ out: * No available requests for this queue, unplug the device and wait for some * requests to become available. */ -static struct request *get_request_wait(request_queue_t *q, int rw) +static struct request *get_request_wait(request_queue_t *q, int rw, + struct bio *bio) { DEFINE_WAIT(wait); struct request *rq; @@ -1972,7 +1971,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - rq = get_request(q, rw, GFP_NOIO); + rq = get_request(q, rw, bio, GFP_NOIO); if (!rq) { struct io_context *ioc; @@ -2003,9 +2002,9 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) BUG_ON(rw != READ && rw != WRITE); if (gfp_mask & __GFP_WAIT) - rq = get_request_wait(q, rw); + rq = get_request_wait(q, rw, NULL); else - rq = get_request(q, rw, gfp_mask); + rq = get_request(q, rw, NULL, gfp_mask); return rq; } @@ -2333,7 +2332,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req) return; req->rq_status = RQ_INACTIVE; - req->q = NULL; req->rl = NULL; /* @@ -2462,6 +2460,8 @@ static int attempt_merge(request_queue_t *q, struct request *req, req->rq_disk->in_flight--; } + req->ioprio = ioprio_best(req->ioprio, next->ioprio); + __blk_put_request(q, next); return 1; } @@ -2514,11 +2514,13 @@ static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req, *freereq = NULL; int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; + unsigned short prio; sector_t sector; sector = bio->bi_sector; nr_sectors = bio_sectors(bio); cur_nr_sectors = bio_cur_sectors(bio); + prio = bio_prio(bio); rw = bio_data_dir(bio); sync = bio_sync(bio); @@ -2559,6 +2561,7 @@ again: req->biotail->bi_next = bio; req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_back_merge(q, req)) elv_merged_request(q, req); @@ -2583,6 +2586,7 @@ again: req->hard_cur_sectors = cur_nr_sectors; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_front_merge(q, req)) elv_merged_request(q, req); @@ -2610,7 +2614,7 @@ get_rq: freereq = NULL; } else { spin_unlock_irq(q->queue_lock); - if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { + if ((freereq = get_request(q, rw, bio, GFP_ATOMIC)) == NULL) { /* * READA bit set */ @@ -2618,7 +2622,7 @@ get_rq: if (bio_rw_ahead(bio)) goto end_io; - freereq = get_request_wait(q, rw); + freereq = get_request_wait(q, rw, bio); } goto again; } @@ -2646,6 +2650,7 @@ get_rq: req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; req->bio = req->biotail = bio; + req->ioprio = prio; req->rq_disk = bio->bi_bdev->bd_disk; req->start_time = jiffies; @@ -2674,7 +2679,7 @@ static inline void blk_partition_remap(struct bio *bio) if (bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - switch (bio->bi_rw) { + switch (bio_data_dir(bio)) { case READ: p->read_sectors += bio_sectors(bio); p->reads++; @@ -2693,6 +2698,7 @@ void blk_finish_queue_drain(request_queue_t *q) { struct request_list *rl = &q->rq; struct request *rq; + int requeued = 0; spin_lock_irq(q->queue_lock); clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); @@ -2701,9 +2707,13 @@ void blk_finish_queue_drain(request_queue_t *q) rq = list_entry_rq(q->drain_list.next); list_del_init(&rq->queuelist); - __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); + elv_requeue_request(q, rq); + requeued++; } + if (requeued) + q->request_fn(q); + spin_unlock_irq(q->queue_lock); wake_up(&rl->wait[0]); @@ -2900,7 +2910,7 @@ void submit_bio(int rw, struct bio *bio) BIO_BUG_ON(!bio->bi_size); BIO_BUG_ON(!bio->bi_io_vec); - bio->bi_rw = rw; + bio->bi_rw |= rw; if (rw & WRITE) mod_page_state(pgpgout, count); else @@ -3257,8 +3267,11 @@ void exit_io_context(void) struct io_context *ioc; local_irq_save(flags); + task_lock(current); ioc = current->io_context; current->io_context = NULL; + ioc->task = NULL; + task_unlock(current); local_irq_restore(flags); if (ioc->aic && ioc->aic->exit) @@ -3293,12 +3306,12 @@ struct io_context *get_io_context(int gfp_flags) ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); if (ret) { atomic_set(&ret->refcount, 1); - ret->pid = tsk->pid; + ret->task = current; + ret->set_ioprio = NULL; ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; ret->cic = NULL; - spin_lock_init(&ret->lock); local_irq_save(flags); diff --git a/fs/Makefile b/fs/Makefile index fc92e59e9fa..20edcf28bfd 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,6 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ + ioprio.o obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_COMPAT) += compat.o diff --git a/fs/ioprio.c b/fs/ioprio.c new file mode 100644 index 00000000000..663e420636d --- /dev/null +++ b/fs/ioprio.c @@ -0,0 +1,172 @@ +/* + * fs/ioprio.c + * + * Copyright (C) 2004 Jens Axboe + * + * Helper functions for setting/querying io priorities of processes. The + * system calls closely mimmick getpriority/setpriority, see the man page for + * those. The prio argument is a composite of prio class and prio data, where + * the data argument has meaning within that class. The standard scheduling + * classes have 8 distinct prio levels, with 0 being the highest prio and 7 + * being the lowest. + * + * IOW, setting BE scheduling class with prio 2 is done ala: + * + * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; + * + * ioprio_set(PRIO_PROCESS, pid, prio); + * + * See also Documentation/block/ioprio.txt + * + */ +#include +#include +#include + +static int set_task_ioprio(struct task_struct *task, int ioprio) +{ + struct io_context *ioc; + + if (task->uid != current->euid && + task->uid != current->uid && !capable(CAP_SYS_NICE)) + return -EPERM; + + task_lock(task); + + task->ioprio = ioprio; + + ioc = task->io_context; + if (ioc && ioc->set_ioprio) + ioc->set_ioprio(ioc, ioprio); + + task_unlock(task); + return 0; +} + +asmlinkage int sys_ioprio_set(int which, int who, int ioprio) +{ + int class = IOPRIO_PRIO_CLASS(ioprio); + int data = IOPRIO_PRIO_DATA(ioprio); + struct task_struct *p, *g; + struct user_struct *user; + int ret; + + switch (class) { + case IOPRIO_CLASS_RT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* fall through, rt has prio field too */ + case IOPRIO_CLASS_BE: + if (data >= IOPRIO_BE_NR || data < 0) + return -EINVAL; + + break; + case IOPRIO_CLASS_IDLE: + break; + default: + return -EINVAL; + } + + ret = -ESRCH; + read_lock_irq(&tasklist_lock); + switch (which) { + case IOPRIO_WHO_PROCESS: + if (!who) + p = current; + else + p = find_task_by_pid(who); + if (p) + ret = set_task_ioprio(p, ioprio); + break; + case IOPRIO_WHO_PGRP: + if (!who) + who = process_group(current); + do_each_task_pid(who, PIDTYPE_PGID, p) { + ret = set_task_ioprio(p, ioprio); + if (ret) + break; + } while_each_task_pid(who, PIDTYPE_PGID, p); + break; + case IOPRIO_WHO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + break; + + do_each_thread(g, p) { + if (p->uid != who) + continue; + ret = set_task_ioprio(p, ioprio); + if (ret) + break; + } while_each_thread(g, p); + + if (who) + free_uid(user); + break; + default: + ret = -EINVAL; + } + + read_unlock_irq(&tasklist_lock); + return ret; +} + +asmlinkage int sys_ioprio_get(int which, int who) +{ + struct task_struct *g, *p; + struct user_struct *user; + int ret = -ESRCH; + + read_lock_irq(&tasklist_lock); + switch (which) { + case IOPRIO_WHO_PROCESS: + if (!who) + p = current; + else + p = find_task_by_pid(who); + if (p) + ret = p->ioprio; + break; + case IOPRIO_WHO_PGRP: + if (!who) + who = process_group(current); + do_each_task_pid(who, PIDTYPE_PGID, p) { + if (ret == -ESRCH) + ret = p->ioprio; + else + ret = ioprio_best(ret, p->ioprio); + } while_each_task_pid(who, PIDTYPE_PGID, p); + break; + case IOPRIO_WHO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + break; + + do_each_thread(g, p) { + if (p->uid != user->uid) + continue; + if (ret == -ESRCH) + ret = p->ioprio; + else + ret = ioprio_best(ret, p->ioprio); + } while_each_thread(g, p); + + if (who) + free_uid(user); + break; + default: + ret = -EINVAL; + } + + read_unlock_irq(&tasklist_lock); + return ret; +} + diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 7b87707acc3..d1bcf0da672 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -645,18 +645,22 @@ struct buffer_chunk { static void write_chunk(struct buffer_chunk *chunk) { int i; + get_fs_excl(); for (i = 0; i < chunk->nr ; i++) { submit_logged_buffer(chunk->bh[i]) ; } chunk->nr = 0; + put_fs_excl(); } static void write_ordered_chunk(struct buffer_chunk *chunk) { int i; + get_fs_excl(); for (i = 0; i < chunk->nr ; i++) { submit_ordered_buffer(chunk->bh[i]) ; } chunk->nr = 0; + put_fs_excl(); } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, @@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list return 0 ; } + get_fs_excl(); + /* before we can put our commit blocks on disk, we have to make sure everyone older than ** us is on disk too */ @@ -1055,6 +1061,7 @@ put_jl: if (retval) reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); + put_fs_excl(); return retval; } @@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s, return 0 ; } + get_fs_excl(); + /* if all the work is already done, get out of here */ if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { @@ -1450,6 +1459,7 @@ flush_older_and_return: put_journal_list(s, jl); if (flushall) up(&journal->j_flush_sem); + put_fs_excl(); return err ; } @@ -2719,6 +2729,7 @@ relock: th->t_trans_id = journal->j_trans_id ; unlock_journal(p_s_sb) ; INIT_LIST_HEAD (&th->t_list); + get_fs_excl(); return 0 ; out_fail: @@ -3526,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b BUG_ON (th->t_refcount > 1); BUG_ON (!th->t_trans_id); + put_fs_excl(); current->journal_info = th->t_handle_save; reiserfs_check_lock_depth(p_s_sb, "journal end"); if (journal->j_len == 0) { diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 176413fb9ae..e25e4c71a87 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -294,8 +294,10 @@ #define __NR_add_key 286 #define __NR_request_key 287 #define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 -#define NR_syscalls 289 +#define NR_syscalls 291 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index f7f43ec2483..517f1649ee6 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -263,6 +263,8 @@ #define __NR_add_key 1271 #define __NR_request_key 1272 #define __NR_keyctl 1273 +#define __NR_ioprio_set 1274 +#define __NR_ioprio_get 1275 #define __NR_set_zone_reclaim 1276 #ifdef __KERNEL__ diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h index cc51e5c9acc..e8b79220b29 100644 --- a/include/asm-ppc/unistd.h +++ b/include/asm-ppc/unistd.h @@ -277,8 +277,10 @@ #define __NR_request_key 270 #define __NR_keyctl 271 #define __NR_waitid 272 +#define __NR_ioprio_set 273 +#define __NR_ioprio_get 274 -#define __NR_syscalls 273 +#define __NR_syscalls 275 #define __NR(n) #n diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index d767adcbf0f..6560439a83e 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -561,8 +561,12 @@ __SYSCALL(__NR_add_key, sys_add_key) __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 250 __SYSCALL(__NR_keyctl, sys_keyctl) +#define __NR_ioprio_set 251 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 252 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) -#define __NR_syscall_max __NR_keyctl +#define __NR_syscall_max __NR_ioprio_get #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 038022763f0..36ef29fa0d8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -22,6 +22,7 @@ #include #include +#include /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include @@ -149,6 +150,19 @@ struct bio { #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 +/* + * upper 16 bits of bi_rw define the io priority of this bio + */ +#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS) +#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) +#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio)) + +#define bio_set_prio(bio, prio) do { \ + WARN_ON(prio >= (1 << IOPRIO_BITS)); \ + (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \ + (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \ +} while (0) + /* * various member access, note that bio_data should of course not be used * on highmem page vectors diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b54a0348a89..21a8674cd14 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -54,16 +54,23 @@ struct as_io_context { struct cfq_queue; struct cfq_io_context { - void (*dtor)(struct cfq_io_context *); - void (*exit)(struct cfq_io_context *); - - struct io_context *ioc; - /* * circular list of cfq_io_contexts belonging to a process io context */ struct list_head list; struct cfq_queue *cfqq; + void *key; + + struct io_context *ioc; + + unsigned long last_end_request; + unsigned long last_queue; + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + + void (*dtor)(struct cfq_io_context *); + void (*exit)(struct cfq_io_context *); }; /* @@ -73,7 +80,9 @@ struct cfq_io_context { */ struct io_context { atomic_t refcount; - pid_t pid; + struct task_struct *task; + + int (*set_ioprio)(struct io_context *, unsigned int); /* * For request batching @@ -81,8 +90,6 @@ struct io_context { unsigned long last_waited; /* Time last woken after wait for request */ int nr_batch_requests; /* Number of requests left in the batch */ - spinlock_t lock; - struct as_io_context *aic; struct cfq_io_context *cic; }; @@ -134,6 +141,8 @@ struct request { void *elevator_private; + unsigned short ioprio; + int rq_status; /* should split this into a few status bits */ struct gendisk *rq_disk; int errors; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index ee54f81faad..ea6bbc2d740 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -16,9 +16,9 @@ typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *); typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); -typedef int (elevator_may_queue_fn) (request_queue_t *, int); +typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); -typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); +typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); @@ -96,9 +96,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(request_queue_t *q); extern void elv_unregister_queue(request_queue_t *q); -extern int elv_may_queue(request_queue_t *, int); +extern int elv_may_queue(request_queue_t *, int, struct bio *); extern void elv_completed_request(request_queue_t *, struct request *); -extern int elv_set_request(request_queue_t *, struct request *, int); +extern int elv_set_request(request_queue_t *, struct request *, struct bio *, int); extern void elv_put_request(request_queue_t *, struct request *); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ae8e37bdfc..047bde30836 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -213,6 +213,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -822,16 +823,34 @@ enum { #define vfs_check_frozen(sb, level) \ wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) +static inline void get_fs_excl(void) +{ + atomic_inc(¤t->fs_excl); +} + +static inline void put_fs_excl(void) +{ + atomic_dec(¤t->fs_excl); +} + +static inline int has_fs_excl(void) +{ + return atomic_read(¤t->fs_excl); +} + + /* * Superblock locking. */ static inline void lock_super(struct super_block * sb) { + get_fs_excl(); down(&sb->s_lock); } static inline void unlock_super(struct super_block * sb) { + put_fs_excl(); up(&sb->s_lock); } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 03206a425d7..c727c195a91 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -81,6 +81,7 @@ extern struct group_info init_groups; .mm = NULL, \ .active_mm = &init_mm, \ .run_list = LIST_HEAD_INIT(tsk.run_list), \ + .ioprio = 0, \ .time_slice = HZ, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ @@ -110,6 +111,7 @@ extern struct group_info init_groups; .proc_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + .fs_excl = ATOMIC_INIT(0), \ } diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h new file mode 100644 index 00000000000..7811300d88e --- /dev/null +++ b/include/linux/ioprio.h @@ -0,0 +1,87 @@ +#ifndef IOPRIO_H +#define IOPRIO_H + +#include + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_BITS (16) +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +asmlinkage int sys_ioprio_set(int, int, int); +asmlinkage int sys_ioprio_get(int, int); + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * if process has set io priority explicitly, use that. if not, convert + * the cpu scheduler nice value to an io priority + */ +#define IOPRIO_NORM (4) +static inline int task_ioprio(struct task_struct *task) +{ + WARN_ON(!ioprio_valid(task->ioprio)); + return IOPRIO_PRIO_DATA(task->ioprio); +} + +static inline int task_nice_ioprio(struct task_struct *task) +{ + return (task_nice(task) + 20) / 5; +} + +/* + * For inheritance, return the highest of the two given priorities + */ +static inline int ioprio_best(unsigned short aprio, unsigned short bprio) +{ + unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); + unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + + if (!ioprio_valid(aprio)) + return bprio; + if (!ioprio_valid(bprio)) + return aprio; + + if (aclass == IOPRIO_CLASS_NONE) + aclass = IOPRIO_CLASS_BE; + if (bclass == IOPRIO_CLASS_NONE) + bclass = IOPRIO_CLASS_BE; + + if (aclass == bclass) + return min(aprio, bprio); + if (aclass > bclass) + return bprio; + else + return aprio; +} + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 9530b190316..ff48815bd3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -608,6 +608,8 @@ struct task_struct { struct list_head run_list; prio_array_t *array; + unsigned short ioprio; + unsigned long sleep_avg; unsigned long long timestamp, last_ran; unsigned long long sched_time; /* sched_clock time spent running */ @@ -763,6 +765,7 @@ struct task_struct { nodemask_t mems_allowed; int cpuset_mems_generation; #endif + atomic_t fs_excl; /* holding fs exclusive resources */ }; static inline pid_t process_group(struct task_struct *tsk) @@ -1112,7 +1115,8 @@ extern void unhash_process(struct task_struct *p); /* * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. + * subscriptions and synchronises with wait4(). Also used in procfs. Also + * pins the final release of task.io_context. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1262cb43c3a..d5c3fe1bf33 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -14,11 +14,13 @@ extern struct list_head inode_unused; * Yes, writeback.h requires sched.h * No, sched.h is not included from here. */ -static inline int current_is_pdflush(void) +static inline int task_is_pdflush(struct task_struct *task) { - return current->flags & PF_FLUSHER; + return task->flags & PF_FLUSHER; } +#define current_is_pdflush() task_is_pdflush(current) + /* * fs/fs-writeback.c */ diff --git a/kernel/exit.c b/kernel/exit.c index 3ebcd60a19c..9d1b10ed013 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -784,6 +784,8 @@ fastcall NORET_TYPE void do_exit(long code) profile_task_exit(tsk); + WARN_ON(atomic_read(&tsk->fs_excl)); + if (unlikely(in_interrupt())) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) diff --git a/kernel/fork.c b/kernel/fork.c index 2c7806873bf..cdef6cea890 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1090,6 +1090,11 @@ static task_t *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); } + /* + * inherit ioprio + */ + p->ioprio = current->ioprio; + SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); diff --git a/kernel/sched.c b/kernel/sched.c index a07cff90d84..e2b0d3e4dd0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3448,15 +3448,7 @@ int task_nice(const task_t *p) { return TASK_NICE(p); } - -/* - * The only users of task_nice are binfmt_elf and binfmt_elf32. - * binfmt_elf is no longer modular, but binfmt_elf32 still is. - * Therefore, task_nice is needed if there is a compat_mode. - */ -#ifdef CONFIG_COMPAT EXPORT_SYMBOL_GPL(task_nice); -#endif /** * idle_cpu - is a given cpu idle currently? -- cgit v1.2.3-70-g09d2 From 3b18152c327707ae6a2eeba4cfb66457143753bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Jun 2005 10:56:24 +0200 Subject: [PATCH] CFQ io scheduler updates - Adjust slice values - Instead of one async queue, one is defined per priority level. This prevents kernel threads (such as reiserfs/x and others) that run at higher io priority from conflicting with others. Previously, it was a coin toss what io prio the async queue got, it was defined by who first set up the queue. - Let a time slice only begin, when the previous slice is completely done. Previously we could be somewhat unfair to a new sync slice, if the previous slice was async and had several ios queued. This might need a little tweaking if throughput suffers a little due to this, allowing perhaps an overlap of a single request or so. - Optimize the calling of kblockd_schedule_work() by doing it only when it is strictly necessary (no requests in driver and work left to do). - Correct sync vs async logic. A 'normal' process can be purely async as well, and a flusher can be purely sync as well. Sync or async is now a property of the class defined and requests pending. Previously writers could be considered sync, when they were really async. - Get rid of the bit fields in cfqq and crq, use flags instead. - Various other cleanups and fixes Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/block/cfq-iosched.c | 460 ++++++++++++++++++++++++++++---------------- include/linux/ioprio.h | 1 + 2 files changed, 300 insertions(+), 161 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index baa3e268250..1ecb179b860 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -34,14 +34,15 @@ static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ static int cfq_slice_sync = HZ / 10; -static int cfq_slice_async = HZ / 50; +static int cfq_slice_async = HZ / 25; static int cfq_slice_async_rq = 2; -static int cfq_slice_idle = HZ / 50; +static int cfq_slice_idle = HZ / 100; #define CFQ_IDLE_GRACE (HZ / 10) #define CFQ_SLICE_SCALE (5) #define CFQ_KEY_ASYNC (0) +#define CFQ_KEY_ANY (0xffff) /* * disable queueing at the driver/hardware level @@ -96,7 +97,16 @@ static kmem_cache_t *cfq_ioc_pool; #define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) -#define cfq_cfqq_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) +#define ASYNC (0) +#define SYNC (1) + +#define cfq_cfqq_dispatched(cfqq) \ + ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC]) + +#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) + +#define cfq_cfqq_sync(cfqq) \ + (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) /* * Per block device queue structure @@ -200,28 +210,15 @@ struct cfq_queue { unsigned long slice_left; unsigned long service_last; - /* number of requests that have been handed to the driver */ - int in_flight; + /* number of requests that are on the dispatch list */ + int on_dispatch[2]; /* io prio of this group */ unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; - /* whether queue is on rr (or empty) list */ - unsigned on_rr : 1; - /* idle slice, waiting for new request submission */ - unsigned wait_request : 1; - /* set when wait_request gets set, reset on first rq alloc */ - unsigned must_alloc : 1; - /* only gets one must_alloc per slice */ - unsigned must_alloc_slice : 1; - /* idle slice, request added, now waiting to dispatch it */ - unsigned must_dispatch : 1; - /* fifo expire per-slice */ - unsigned fifo_expire : 1; - - unsigned idle_window : 1; - unsigned prio_changed : 1; + /* various state flags, see below */ + unsigned int flags; }; struct cfq_rq { @@ -233,15 +230,77 @@ struct cfq_rq { struct cfq_queue *cfq_queue; struct cfq_io_context *io_context; - unsigned in_flight : 1; - unsigned accounted : 1; - unsigned is_sync : 1; - unsigned requeued : 1; + unsigned int crq_flags; }; -static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int); +enum cfqq_state_flags { + CFQ_CFQQ_FLAG_on_rr = 0, + CFQ_CFQQ_FLAG_wait_request, + CFQ_CFQQ_FLAG_must_alloc, + CFQ_CFQQ_FLAG_must_alloc_slice, + CFQ_CFQQ_FLAG_must_dispatch, + CFQ_CFQQ_FLAG_fifo_expire, + CFQ_CFQQ_FLAG_idle_window, + CFQ_CFQQ_FLAG_prio_changed, + CFQ_CFQQ_FLAG_expired, +}; + +#define CFQ_CFQQ_FNS(name) \ +static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ +{ \ + cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ +} \ +static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ +{ \ + cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ +} \ +static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ +{ \ + return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ +} + +CFQ_CFQQ_FNS(on_rr); +CFQ_CFQQ_FNS(wait_request); +CFQ_CFQQ_FNS(must_alloc); +CFQ_CFQQ_FNS(must_alloc_slice); +CFQ_CFQQ_FNS(must_dispatch); +CFQ_CFQQ_FNS(fifo_expire); +CFQ_CFQQ_FNS(idle_window); +CFQ_CFQQ_FNS(prio_changed); +CFQ_CFQQ_FNS(expired); +#undef CFQ_CFQQ_FNS + +enum cfq_rq_state_flags { + CFQ_CRQ_FLAG_in_flight = 0, + CFQ_CRQ_FLAG_in_driver, + CFQ_CRQ_FLAG_is_sync, + CFQ_CRQ_FLAG_requeued, +}; + +#define CFQ_CRQ_FNS(name) \ +static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ +{ \ + crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ +} \ +static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ +{ \ + crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ +} \ +static inline int cfq_crq_##name(const struct cfq_rq *crq) \ +{ \ + return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ +} + +CFQ_CRQ_FNS(in_flight); +CFQ_CRQ_FNS(in_driver); +CFQ_CRQ_FNS(is_sync); +CFQ_CRQ_FNS(requeued); +#undef CFQ_CRQ_FNS + +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); static void cfq_put_cfqd(struct cfq_data *cfqd); +static inline int cfq_pending_requests(struct cfq_data *cfqd); #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) @@ -305,9 +364,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) return crq2; if (crq2 == NULL) return crq1; - if (crq1->requeued) + if (cfq_crq_requeued(crq1)) return crq1; - if (crq2->requeued) + if (cfq_crq_requeued(crq2)) return crq2; s1 = crq1->request->sector; @@ -407,7 +466,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) struct cfq_data *cfqd = cfqq->cfqd; struct list_head *list, *entry; - BUG_ON(!cfqq->on_rr); + BUG_ON(!cfq_cfqq_on_rr(cfqq)); list_del(&cfqq->cfq_list); @@ -423,7 +482,7 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) * has lots of io pending vs one that only generates one * sporadically or synchronously */ - if (cfqq->in_flight) + if (cfq_cfqq_dispatched(cfqq)) list = &cfqd->busy_rr; else list = &cfqd->rr_list[cfqq->ioprio]; @@ -461,8 +520,8 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) static inline void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) { - BUG_ON(cfqq->on_rr); - cfqq->on_rr = 1; + BUG_ON(cfq_cfqq_on_rr(cfqq)); + cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; cfq_resort_rr_list(cfqq, requeue); @@ -471,8 +530,8 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) static inline void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - BUG_ON(!cfqq->on_rr); - cfqq->on_rr = 0; + BUG_ON(!cfq_cfqq_on_rr(cfqq)); + cfq_clear_cfqq_on_rr(cfqq); list_move(&cfqq->cfq_list, &cfqd->empty_list); BUG_ON(!cfqd->busy_queues); @@ -488,7 +547,7 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq) if (ON_RB(&crq->rb_node)) { struct cfq_data *cfqd = cfqq->cfqd; - const int sync = crq->is_sync; + const int sync = cfq_crq_is_sync(crq); BUG_ON(!cfqq->queued[sync]); cfqq->queued[sync]--; @@ -498,7 +557,7 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq) rb_erase(&crq->rb_node, &cfqq->sort_list); RB_CLEAR_COLOR(&crq->rb_node); - if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list)) + if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); } } @@ -534,7 +593,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) struct cfq_rq *__alias; crq->rb_key = rq_rb_key(rq); - cfqq->queued[crq->is_sync]++; + cfqq->queued[cfq_crq_is_sync(crq)]++; /* * looks a little odd, but the first insert might return an alias. @@ -545,8 +604,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) rb_insert_color(&crq->rb_node, &cfqq->sort_list); - if (!cfqq->on_rr) - cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued); + if (!cfq_cfqq_on_rr(cfqq)) + cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq)); /* * check if this request is a better next-serve candidate @@ -559,7 +618,7 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) { if (ON_RB(&crq->rb_node)) { rb_erase(&crq->rb_node, &cfqq->sort_list); - cfqq->queued[crq->is_sync]--; + cfqq->queued[cfq_crq_is_sync(crq)]--; } cfq_add_crq_rb(crq); @@ -568,7 +627,7 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) { - struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid); + struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); struct rb_node *n; if (!cfqq) @@ -598,17 +657,19 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq) if (crq) { struct cfq_queue *cfqq = crq->cfq_queue; - if (crq->accounted) { - crq->accounted = 0; + if (cfq_crq_in_driver(crq)) { + cfq_clear_crq_in_driver(crq); WARN_ON(!cfqd->rq_in_driver); cfqd->rq_in_driver--; } - if (crq->in_flight) { - crq->in_flight = 0; - WARN_ON(!cfqq->in_flight); - cfqq->in_flight--; + if (cfq_crq_in_flight(crq)) { + const int sync = cfq_crq_is_sync(crq); + + cfq_clear_crq_in_flight(crq); + WARN_ON(!cfqq->on_dispatch[sync]); + cfqq->on_dispatch[sync]--; } - crq->requeued = 1; + cfq_mark_crq_requeued(crq); } } @@ -712,8 +773,9 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfqq->slice_start = jiffies; cfqq->slice_end = 0; cfqq->slice_left = 0; - cfqq->must_alloc_slice = 0; - cfqq->fifo_expire = 0; + cfq_clear_cfqq_must_alloc_slice(cfqq); + cfq_clear_cfqq_fifo_expire(cfqq); + cfq_clear_cfqq_expired(cfqq); } cfqd->active_queue = cfqq; @@ -776,9 +838,18 @@ static int cfq_get_next_prio_level(struct cfq_data *cfqd) return prio; } -static void cfq_set_active_queue(struct cfq_data *cfqd) +static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) { - struct cfq_queue *cfqq = NULL; + struct cfq_queue *cfqq; + + /* + * if current queue is expired but not done with its requests yet, + * wait for that to happen + */ + if ((cfqq = cfqd->active_queue) != NULL) { + if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq)) + return NULL; + } /* * if current list is non-empty, grab first entry. if it is empty, @@ -802,50 +873,66 @@ static void cfq_set_active_queue(struct cfq_data *cfqd) } __cfq_set_active_queue(cfqd, cfqq); + return cfqq; } /* * current cfqq expired its slice (or was too idle), select new one */ -static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) +static void +__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, + int preempted) { - struct cfq_queue *cfqq = cfqd->active_queue; - - if (cfqq) { - unsigned long now = jiffies; + unsigned long now = jiffies; - if (cfqq->wait_request) - del_timer(&cfqd->idle_slice_timer); + if (cfq_cfqq_wait_request(cfqq)) + del_timer(&cfqd->idle_slice_timer); - if (!preempted && !cfqq->in_flight) - cfqq->service_last = now; + if (!preempted && !cfq_cfqq_dispatched(cfqq)) + cfqq->service_last = now; - cfqq->must_dispatch = 0; - cfqq->wait_request = 0; + cfq_clear_cfqq_must_dispatch(cfqq); + cfq_clear_cfqq_wait_request(cfqq); - /* - * store what was left of this slice, if the queue idled out - * or was preempted - */ - if (time_after(now, cfqq->slice_end)) - cfqq->slice_left = now - cfqq->slice_end; - else - cfqq->slice_left = 0; + /* + * store what was left of this slice, if the queue idled out + * or was preempted + */ + if (time_after(now, cfqq->slice_end)) + cfqq->slice_left = now - cfqq->slice_end; + else + cfqq->slice_left = 0; - if (cfqq->on_rr) - cfq_resort_rr_list(cfqq, preempted); + if (cfq_cfqq_on_rr(cfqq)) + cfq_resort_rr_list(cfqq, preempted); + if (cfqq == cfqd->active_queue) cfqd->active_queue = NULL; - if (cfqd->active_cic) { - put_io_context(cfqd->active_cic->ioc); - cfqd->active_cic = NULL; - } + if (cfqd->active_cic) { + put_io_context(cfqd->active_cic->ioc); + cfqd->active_cic = NULL; } cfqd->dispatch_slice = 0; } +static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) +{ + struct cfq_queue *cfqq = cfqd->active_queue; + + if (cfqq) { + /* + * use deferred expiry, if there are requests in progress as + * not to disturb the slice of the next queue + */ + if (cfq_cfqq_dispatched(cfqq)) + cfq_mark_cfqq_expired(cfqq); + else + __cfq_slice_expired(cfqd, cfqq, preempted); + } +} + static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) { @@ -857,7 +944,7 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) */ if (!cfqd->cfq_slice_idle) return 0; - if (!cfqq->idle_window) + if (!cfq_cfqq_idle_window(cfqq)) return 0; /* * task has exited, don't wait @@ -865,13 +952,13 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (cfqd->active_cic && !cfqd->active_cic->ioc->task) return 0; - cfqq->wait_request = 1; - cfqq->must_alloc = 1; + cfq_mark_cfqq_must_dispatch(cfqq); + cfq_mark_cfqq_wait_request(cfqq); if (!timer_pending(&cfqd->idle_slice_timer)) { - unsigned long slice_left = cfqq->slice_end - 1; + unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); - cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left); + cfqd->idle_slice_timer.expires = jiffies + slice_left; add_timer(&cfqd->idle_slice_timer); } @@ -901,7 +988,7 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) break; if (!blk_fs_request(__rq)) break; - if (__crq->requeued) + if (cfq_crq_requeued(__crq)) break; if (__rq->sector <= crq->request->sector) @@ -920,9 +1007,10 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) cfq_del_crq_rb(crq); cfq_remove_merge_hints(q, crq); - crq->in_flight = 1; - crq->requeued = 0; - cfqq->in_flight++; + cfq_mark_crq_in_flight(crq); + cfq_clear_crq_requeued(crq); + + cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; list_add_tail(&crq->request->queuelist, entry); } @@ -935,16 +1023,16 @@ static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) struct request *rq; struct cfq_rq *crq; - if (cfqq->fifo_expire) + if (cfq_cfqq_fifo_expire(cfqq)) return NULL; if (!list_empty(&cfqq->fifo)) { - int fifo = cfq_cfqq_sync(cfqq); + int fifo = cfq_cfqq_class_sync(cfqq); crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); rq = crq->request; if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { - cfqq->fifo_expire = 1; + cfq_mark_cfqq_fifo_expire(cfqq); return crq; } } @@ -953,7 +1041,9 @@ static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) } /* - * Scale schedule slice based on io priority + * Scale schedule slice based on io priority. Use the sync time slice only + * if a queue is marked sync and has sync io queued. A sync queue with async + * io only, should not get full sync slice length. */ static inline int cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) @@ -981,6 +1071,16 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); } +/* + * scheduler run of queue, if there are requests pending and no one in the + * driver that will restart queueing + */ +static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) +{ + if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd)) + kblockd_schedule_work(&cfqd->unplug_work); +} + /* * get next queue for service */ @@ -993,11 +1093,14 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) if (!cfqq) goto new_queue; + if (cfq_cfqq_expired(cfqq)) + goto new_queue; + /* * slice has expired */ - if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end)) - goto new_queue; + if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end)) + goto expire; /* * if queue has requests, dispatch one. if not, check if @@ -1005,17 +1108,18 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) */ if (!RB_EMPTY(&cfqq->sort_list)) goto keep_queue; - else if (!force && cfq_cfqq_sync(cfqq) && + else if (!force && cfq_cfqq_class_sync(cfqq) && time_before(now, cfqq->slice_end)) { if (cfq_arm_slice_timer(cfqd, cfqq)) return NULL; } -new_queue: +expire: cfq_slice_expired(cfqd, 0); - cfq_set_active_queue(cfqd); +new_queue: + cfqq = cfq_set_active_queue(cfqd); keep_queue: - return cfqd->active_queue; + return cfqq; } static int @@ -1083,8 +1187,8 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force) cfqq = cfq_select_queue(cfqd, force); if (cfqq) { - cfqq->wait_request = 0; - cfqq->must_dispatch = 0; + cfq_clear_cfqq_must_dispatch(cfqq); + cfq_clear_cfqq_wait_request(cfqq); del_timer(&cfqd->idle_slice_timer); if (cfq_class_idle(cfqq)) @@ -1108,10 +1212,10 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq) * accounted bit is necessary since some drivers will call * elv_next_request() many times for the same request (eg ide) */ - if (crq->accounted) + if (cfq_crq_in_driver(crq)) return; - crq->accounted = 1; + cfq_mark_crq_in_driver(crq); cfqd->rq_in_driver++; } @@ -1121,7 +1225,7 @@ cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) struct cfq_data *cfqd = cfqq->cfqd; unsigned long now; - if (!crq->accounted) + if (!cfq_crq_in_driver(crq)) return; now = jiffies; @@ -1132,12 +1236,18 @@ cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) if (!cfq_class_idle(cfqq)) cfqd->last_end_request = now; - if (!cfqq->in_flight && cfqq->on_rr) { - cfqq->service_last = now; - cfq_resort_rr_list(cfqq, 0); + if (!cfq_cfqq_dispatched(cfqq)) { + if (cfq_cfqq_on_rr(cfqq)) { + cfqq->service_last = now; + cfq_resort_rr_list(cfqq, 0); + } + if (cfq_cfqq_expired(cfqq)) { + __cfq_slice_expired(cfqd, cfqq, 0); + cfq_schedule_dispatch(cfqd); + } } - if (crq->is_sync) + if (cfq_crq_is_sync(crq)) crq->io_context->last_end_request = now; } @@ -1153,10 +1263,13 @@ dispatch: crq = RQ_DATA(rq); if (crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + /* * if idle window is disabled, allow queue buildup */ - if (!crq->in_flight && !crq->cfq_queue->idle_window && + if (!cfq_crq_in_driver(crq) && + !cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver >= cfqd->cfq_max_depth) return NULL; @@ -1190,11 +1303,11 @@ static void cfq_put_queue(struct cfq_queue *cfqq) BUG_ON(rb_first(&cfqq->sort_list)); BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); - BUG_ON(cfqq->on_rr); + BUG_ON(cfq_cfqq_on_rr(cfqq)); if (unlikely(cfqd->active_queue == cfqq)) { - cfq_slice_expired(cfqd, 0); - kblockd_schedule_work(&cfqd->unplug_work); + __cfq_slice_expired(cfqd, cfqq, 0); + cfq_schedule_dispatch(cfqd); } cfq_put_cfqd(cfqq->cfqd); @@ -1208,15 +1321,17 @@ static void cfq_put_queue(struct cfq_queue *cfqq) } static inline struct cfq_queue * -__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval) +__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, + const int hashval) { struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; struct hlist_node *entry, *next; hlist_for_each_safe(entry, next, hash_list) { struct cfq_queue *__cfqq = list_entry_qhash(entry); + const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio); - if (__cfqq->key == key) + if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) return __cfqq; } @@ -1224,9 +1339,9 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, const int hashval) } static struct cfq_queue * -cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key) +cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) { - return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT)); + return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); } static void cfq_free_io_context(struct cfq_io_context *cic) @@ -1255,8 +1370,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) spin_lock(q->queue_lock); if (unlikely(cic->cfqq == cfqd->active_queue)) { - cfq_slice_expired(cfqd, 0); - kblockd_schedule_work(&cfqd->unplug_work); + __cfq_slice_expired(cfqd, cic->cfqq, 0); + cfq_schedule_dispatch(cfqd); } cfq_put_queue(cic->cfqq); @@ -1313,7 +1428,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) struct task_struct *tsk = current; int ioprio_class; - if (!cfqq->prio_changed) + if (!cfq_cfqq_prio_changed(cfqq)) return; ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); @@ -1338,7 +1453,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) case IOPRIO_CLASS_IDLE: cfqq->ioprio_class = IOPRIO_CLASS_IDLE; cfqq->ioprio = 7; - cfqq->idle_window = 0; + cfq_clear_cfqq_idle_window(cfqq); break; } @@ -1349,10 +1464,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) cfqq->org_ioprio = cfqq->ioprio; cfqq->org_ioprio_class = cfqq->ioprio_class; - if (cfqq->on_rr) + if (cfq_cfqq_on_rr(cfqq)) cfq_resort_rr_list(cfqq, 0); - cfqq->prio_changed = 0; + cfq_clear_cfqq_prio_changed(cfqq); } static inline void changed_ioprio(struct cfq_queue *cfqq) @@ -1361,7 +1476,7 @@ static inline void changed_ioprio(struct cfq_queue *cfqq) struct cfq_data *cfqd = cfqq->cfqd; spin_lock(cfqd->queue->queue_lock); - cfqq->prio_changed = 1; + cfq_mark_cfqq_prio_changed(cfqq); cfq_init_prio_data(cfqq); spin_unlock(cfqd->queue->queue_lock); } @@ -1383,13 +1498,14 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) } static struct cfq_queue * -cfq_get_queue(struct cfq_data *cfqd, unsigned int key, int gfp_mask) +cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio, + int gfp_mask) { const int hashval = hash_long(key, CFQ_QHASH_SHIFT); struct cfq_queue *cfqq, *new_cfqq = NULL; retry: - cfqq = __cfq_find_cfq_hash(cfqd, key, hashval); + cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval); if (!cfqq) { if (new_cfqq) { @@ -1423,10 +1539,9 @@ retry: * set ->slice_left to allow preemption for a new process */ cfqq->slice_left = 2 * cfqd->cfq_slice_idle; - cfqq->idle_window = 1; - cfqq->ioprio = -1; - cfqq->ioprio_class = -1; - cfqq->prio_changed = 1; + cfq_mark_cfqq_idle_window(cfqq); + cfq_mark_cfqq_prio_changed(cfqq); + cfq_init_prio_data(cfqq); } if (new_cfqq) @@ -1553,7 +1668,7 @@ static void cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic) { - int enable_idle = cfqq->idle_window; + int enable_idle = cfq_cfqq_idle_window(cfqq); if (!cic->ioc->task || !cfqd->cfq_slice_idle) enable_idle = 0; @@ -1564,7 +1679,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = 1; } - cfqq->idle_window = enable_idle; + if (enable_idle) + cfq_mark_cfqq_idle_window(cfqq); + else + cfq_clear_cfqq_idle_window(cfqq); } @@ -1586,14 +1704,14 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, if (cfq_class_idle(cfqq)) return 1; - if (!new_cfqq->wait_request) + if (!cfq_cfqq_wait_request(new_cfqq)) return 0; /* * if it doesn't have slice left, forget it */ if (new_cfqq->slice_left < cfqd->cfq_slice_idle) return 0; - if (crq->is_sync && !cfq_cfqq_sync(cfqq)) + if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) return 1; return 0; @@ -1614,7 +1732,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; cfqq->slice_end = cfqq->slice_left + jiffies; - cfq_slice_expired(cfqd, 1); + __cfq_slice_expired(cfqd, cfqq, 1); __cfq_set_active_queue(cfqd, cfqq); } @@ -1639,7 +1757,7 @@ static void cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *crq) { - const int sync = crq->is_sync; + const int sync = cfq_crq_is_sync(crq); cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); @@ -1658,8 +1776,8 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * immediately and flag that we must not expire this queue * just now */ - if (cfqq->wait_request) { - cfqq->must_dispatch = 1; + if (cfq_cfqq_wait_request(cfqq)) { + cfq_mark_cfqq_must_dispatch(cfqq); del_timer(&cfqd->idle_slice_timer); cfq_start_queueing(cfqd, cfqq); } @@ -1670,7 +1788,7 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * has some old slice time left and is of higher priority */ cfq_preempt_queue(cfqd, cfqq); - cfqq->must_dispatch = 1; + cfq_mark_cfqq_must_dispatch(cfqq); cfq_start_queueing(cfqd, cfqq); } } @@ -1713,7 +1831,7 @@ cfq_insert_request(request_queue_t *q, struct request *rq, int where) * be kicked by __make_request() afterward. * Kick it here. */ - kblockd_schedule_work(&cfqd->unplug_work); + cfq_schedule_dispatch(cfqd); break; case ELEVATOR_INSERT_FRONT: list_add(&rq->queuelist, &q->queue_head); @@ -1750,9 +1868,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) cfqq = crq->cfq_queue; - if (crq->in_flight) { - WARN_ON(!cfqq->in_flight); - cfqq->in_flight--; + if (cfq_crq_in_flight(crq)) { + const int sync = cfq_crq_is_sync(crq); + + WARN_ON(!cfqq->on_dispatch[sync]); + cfqq->on_dispatch[sync]--; } cfq_account_completion(cfqq, crq); @@ -1814,7 +1934,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) * refile between round-robin lists if we moved the priority class */ if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) && - cfqq->on_rr) + cfq_cfqq_on_rr(cfqq)) cfq_resort_rr_list(cfqq, 0); } @@ -1830,23 +1950,27 @@ static inline int __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct task_struct *task, int rw) { - if (cfqq->wait_request && cfqq->must_alloc) +#if 1 + if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && + !cfq_cfqq_must_alloc_slice) { + cfq_mark_cfqq_must_alloc_slice(cfqq); return ELV_MQUEUE_MUST; + } return ELV_MQUEUE_MAY; -#if 0 +#else if (!cfqq || task->flags & PF_MEMALLOC) return ELV_MQUEUE_MAY; - if (!cfqq->allocated[rw] || cfqq->must_alloc) { - if (cfqq->wait_request) + if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) { + if (cfq_cfqq_wait_request(cfqq)) return ELV_MQUEUE_MUST; /* * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we * can quickly flood the queue with writes from a single task */ - if (rw == READ || !cfqq->must_alloc_slice) { - cfqq->must_alloc_slice = 1; + if (rw == READ || !cfq_cfqq_must_alloc_slice) { + cfq_mark_cfqq_must_alloc_slice(cfqq); return ELV_MQUEUE_MUST; } @@ -1881,7 +2005,7 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) * so just lookup a possibly existing queue, or return 'may queue' * if that fails */ - cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw)); + cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio); if (cfqq) { cfq_init_prio_data(cfqq); cfq_prio_boost(cfqq); @@ -1943,15 +2067,17 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, int gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; struct cfq_io_context *cic; const int rw = rq_data_dir(rq); + pid_t key = cfq_queue_pid(tsk, rw); struct cfq_queue *cfqq; struct cfq_rq *crq; unsigned long flags; might_sleep_if(gfp_mask & __GFP_WAIT); - cic = cfq_get_io_context(cfqd, cfq_queue_pid(current, rw), gfp_mask); + cic = cfq_get_io_context(cfqd, key, gfp_mask); spin_lock_irqsave(q->queue_lock, flags); @@ -1959,7 +2085,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, goto queue_fail; if (!cic->cfqq) { - cfqq = cfq_get_queue(cfqd, current->pid, gfp_mask); + cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask); if (!cfqq) goto queue_fail; @@ -1968,7 +2094,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, cfqq = cic->cfqq; cfqq->allocated[rw]++; - cfqq->must_alloc = 0; + cfq_clear_cfqq_must_alloc(cfqq); cfqd->rq_starved = 0; atomic_inc(&cfqq->ref); spin_unlock_irqrestore(q->queue_lock, flags); @@ -1981,9 +2107,15 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, INIT_HLIST_NODE(&crq->hash); crq->cfq_queue = cfqq; crq->io_context = cic; - crq->in_flight = crq->accounted = 0; - crq->is_sync = (rw == READ || process_sync(current)); - crq->requeued = 0; + cfq_clear_crq_in_flight(crq); + cfq_clear_crq_in_driver(crq); + cfq_clear_crq_requeued(crq); + + if (rw == READ || process_sync(tsk)) + cfq_mark_crq_is_sync(crq); + else + cfq_clear_crq_is_sync(crq); + rq->elevator_private = crq; return 0; } @@ -1991,7 +2123,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, spin_lock_irqsave(q->queue_lock, flags); cfqq->allocated[rw]--; if (!(cfqq->allocated[0] + cfqq->allocated[1])) - cfqq->must_alloc = 1; + cfq_mark_cfqq_must_alloc(cfqq); cfq_put_queue(cfqq); queue_fail: if (cic) @@ -2002,7 +2134,7 @@ queue_fail: * that would be an extremely rare OOM situation */ cfqd->rq_starved = 1; - kblockd_schedule_work(&cfqd->unplug_work); + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); return 1; } @@ -2068,15 +2200,14 @@ static void cfq_idle_slice_timer(unsigned long data) * not expired and it has a request pending, let it dispatch */ if (!RB_EMPTY(&cfqq->sort_list)) { - cfqq->must_dispatch = 1; + cfq_mark_cfqq_must_dispatch(cfqq); goto out_kick; } } expire: cfq_slice_expired(cfqd, 0); out_kick: - if (cfq_pending_requests(cfqd)) - kblockd_schedule_work(&cfqd->unplug_work); + cfq_schedule_dispatch(cfqd); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } @@ -2099,11 +2230,17 @@ static void cfq_idle_class_timer(unsigned long data) cfqd->idle_class_timer.expires = end; add_timer(&cfqd->idle_class_timer); } else - kblockd_schedule_work(&cfqd->unplug_work); + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } +static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) +{ + del_timer_sync(&cfqd->idle_slice_timer); + del_timer_sync(&cfqd->idle_class_timer); + blk_sync_queue(cfqd->queue); +} static void cfq_put_cfqd(struct cfq_data *cfqd) { @@ -2112,7 +2249,7 @@ static void cfq_put_cfqd(struct cfq_data *cfqd) if (!atomic_dec_and_test(&cfqd->ref)) return; - blk_sync_queue(q); + cfq_shutdown_timer_wq(cfqd); blk_put_queue(q); @@ -2126,8 +2263,7 @@ static void cfq_exit_queue(elevator_t *e) { struct cfq_data *cfqd = e->elevator_data; - del_timer_sync(&cfqd->idle_slice_timer); - del_timer_sync(&cfqd->idle_class_timer); + cfq_shutdown_timer_wq(cfqd); cfq_put_cfqd(cfqd); } @@ -2198,6 +2334,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; cfqd->cfq_max_depth = cfq_max_depth; + return 0; out_crqpool: kfree(cfqd->cfq_hash); @@ -2369,6 +2506,7 @@ static struct cfq_fs_entry cfq_max_depth_entry = { .show = cfq_max_depth_show, .store = cfq_max_depth_store, }; + static struct attribute *default_attrs[] = { &cfq_quantum_entry.attr, &cfq_queued_entry.attr, diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 7811300d88e..8a453a0b5e4 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -12,6 +12,7 @@ #define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) #define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) -- cgit v1.2.3-70-g09d2 From 4808a1c0261176f9c7e28e7f108d41a381a7d0fc Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Sat, 9 Apr 2005 22:57:39 +0300 Subject: [PATCH] USB: Add isp116x-hcd USB host controller driver This patch provides an "isp116x-hcd" driver for Philips' ISP1160/ISP1161 USB host controllers. The driver: - is relatively small, meant for use on embedded platforms. - runs usbtests 1-14 without problems for days. - has been in use by 6-7 different people on ARM and PPC platforms, running a range of devices including USB hubs. - supports suspend/resume of both the platform device and the root hub; supports remote wakeup of the root hub (but NOT the platform device) by USB devices. - does NOT support ISO transfers (nobody has asked for them). - is PIO-only. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Makefile | 1 + drivers/usb/host/Kconfig | 13 + drivers/usb/host/Makefile | 1 + drivers/usb/host/isp116x-hcd.c | 1882 ++++++++++++++++++++++++++++++++++++++++ drivers/usb/host/isp116x.h | 583 +++++++++++++ include/linux/usb_isp116x.h | 47 + 6 files changed, 2527 insertions(+) create mode 100644 drivers/usb/host/isp116x-hcd.c create mode 100644 drivers/usb/host/isp116x.h create mode 100644 include/linux/usb_isp116x.h (limited to 'include/linux') diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index a61d4433a98..c149c06388b 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_USB) += core/ obj-$(CONFIG_USB_MON) += mon/ obj-$(CONFIG_USB_EHCI_HCD) += host/ +obj-$(CONFIG_USB_ISP116X_HCD) += host/ obj-$(CONFIG_USB_OHCI_HCD) += host/ obj-$(CONFIG_USB_UHCI_HCD) += host/ obj-$(CONFIG_USB_SL811_HCD) += host/ diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 19e598c9641..ed1899d307d 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -49,6 +49,19 @@ config USB_EHCI_ROOT_HUB_TT This supports the EHCI implementation from TransDimension Inc. +config USB_ISP116X_HCD + tristate "ISP116X HCD support" + depends on USB + default N + ---help--- + The ISP1160 and ISP1161 chips are USB host controllers. Enable this + option if your board has this chip. If unsure, say N. + + This driver does not support isochronous transfers. + + To compile this driver as a module, choose M here: the + module will be called isp116x-hcd. + config USB_OHCI_HCD tristate "OHCI HCD support" depends on USB && USB_ARCH_HAS_OHCI diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 5dbd3e7a27c..350d14fc1cc 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -4,6 +4,7 @@ # obj-$(CONFIG_USB_EHCI_HCD) += ehci-hcd.o +obj-$(CONFIG_USB_ISP116X_HCD) += isp116x-hcd.o obj-$(CONFIG_USB_OHCI_HCD) += ohci-hcd.o obj-$(CONFIG_USB_UHCI_HCD) += uhci-hcd.o obj-$(CONFIG_USB_SL811_HCD) += sl811-hcd.o diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c new file mode 100644 index 00000000000..69e7433d9ce --- /dev/null +++ b/drivers/usb/host/isp116x-hcd.c @@ -0,0 +1,1882 @@ +/* + * ISP116x HCD (Host Controller Driver) for USB. + * + * Derived from the SL811 HCD, rewritten for ISP116x. + * Copyright (C) 2005 Olav Kongas + * + * Portions: + * Copyright (C) 2004 Psion Teklogix (for NetBook PRO) + * Copyright (C) 2004 David Brownell + * + * Periodic scheduling is based on Roman's OHCI code + * Copyright (C) 1999 Roman Weissgaerber + * + */ + +/* + * The driver basically works. A number of people have used it with a range + * of devices. + * + *The driver passes all usbtests 1-14. + * + * Suspending/resuming of root hub via sysfs works. Remote wakeup works too. + * And suspending/resuming of platform device works too. Suspend/resume + * via HCD operations vector is not implemented. + * + * Iso transfer support is not implemented. Adding this would include + * implementing recovery from the failure to service the processed ITL + * fifo ram in time, which will involve chip reset. + * + * TODO: + + More testing of suspend/resume. +*/ + +/* + ISP116x chips require certain delays between accesses to its + registers. The following timing options exist. + + 1. Configure your memory controller (the best) + 2. Implement platform-specific delay function possibly + combined with configuring the memory controller; see + include/linux/usb-isp116x.h for more info. Some broken + memory controllers line LH7A400 SMC need this. Also, + uncomment for that to work the following + USE_PLATFORM_DELAY macro. + 3. Use ndelay (easiest, poorest). For that, uncomment + the following USE_NDELAY macro. +*/ +#define USE_PLATFORM_DELAY +//#define USE_NDELAY + +//#define DEBUG +//#define VERBOSE +/* Transfer descriptors. See dump_ptd() for printout format */ +//#define PTD_TRACE +/* enqueuing/finishing log of urbs */ +//#define URB_TRACE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifndef DEBUG +# define STUB_DEBUG_FILE +#endif + +#include "../core/hcd.h" +#include "isp116x.h" + +#define DRIVER_VERSION "08 Apr 2005" +#define DRIVER_DESC "ISP116x USB Host Controller Driver" + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); + +static const char hcd_name[] = "isp116x-hcd"; + +/*-----------------------------------------------------------------*/ + +/* + Write len bytes to fifo, pad till 32-bit boundary + */ +static void write_ptddata_to_fifo(struct isp116x *isp116x, void *buf, int len) +{ + u8 *dp = (u8 *) buf; + u16 *dp2 = (u16 *) buf; + u16 w; + int quot = len % 4; + + if ((unsigned long)dp2 & 1) { + /* not aligned */ + for (; len > 1; len -= 2) { + w = *dp++; + w |= *dp++ << 8; + isp116x_raw_write_data16(isp116x, w); + } + if (len) + isp116x_write_data16(isp116x, (u16) * dp); + } else { + /* aligned */ + for (; len > 1; len -= 2) + isp116x_raw_write_data16(isp116x, *dp2++); + if (len) + isp116x_write_data16(isp116x, 0xff & *((u8 *) dp2)); + } + if (quot == 1 || quot == 2) + isp116x_raw_write_data16(isp116x, 0); +} + +/* + Read len bytes from fifo and then read till 32-bit boundary. + */ +static void read_ptddata_from_fifo(struct isp116x *isp116x, void *buf, int len) +{ + u8 *dp = (u8 *) buf; + u16 *dp2 = (u16 *) buf; + u16 w; + int quot = len % 4; + + if ((unsigned long)dp2 & 1) { + /* not aligned */ + for (; len > 1; len -= 2) { + w = isp116x_raw_read_data16(isp116x); + *dp++ = w & 0xff; + *dp++ = (w >> 8) & 0xff; + } + if (len) + *dp = 0xff & isp116x_read_data16(isp116x); + } else { + /* aligned */ + for (; len > 1; len -= 2) + *dp2++ = isp116x_raw_read_data16(isp116x); + if (len) + *(u8 *) dp2 = 0xff & isp116x_read_data16(isp116x); + } + if (quot == 1 || quot == 2) + isp116x_raw_read_data16(isp116x); +} + +/* + Write ptd's and data for scheduled transfers into + the fifo ram. Fifo must be empty and ready. +*/ +static void pack_fifo(struct isp116x *isp116x) +{ + struct isp116x_ep *ep; + struct ptd *ptd; + int buflen = isp116x->atl_last_dir == PTD_DIR_IN + ? isp116x->atl_bufshrt : isp116x->atl_buflen; + int ptd_count = 0; + + isp116x_write_reg16(isp116x, HCuPINT, HCuPINT_AIIEOT); + isp116x_write_reg16(isp116x, HCXFERCTR, buflen); + isp116x_write_addr(isp116x, HCATLPORT | ISP116x_WRITE_OFFSET); + for (ep = isp116x->atl_active; ep; ep = ep->active) { + ++ptd_count; + ptd = &ep->ptd; + dump_ptd(ptd); + dump_ptd_out_data(ptd, ep->data); + isp116x_write_data16(isp116x, ptd->count); + isp116x_write_data16(isp116x, ptd->mps); + isp116x_write_data16(isp116x, ptd->len); + isp116x_write_data16(isp116x, ptd->faddr); + buflen -= sizeof(struct ptd); + /* Skip writing data for last IN PTD */ + if (ep->active || (isp116x->atl_last_dir != PTD_DIR_IN)) { + write_ptddata_to_fifo(isp116x, ep->data, ep->length); + buflen -= ALIGN(ep->length, 4); + } + } + BUG_ON(buflen); +} + +/* + Read the processed ptd's and data from fifo ram back to + URBs' buffers. Fifo must be full and done +*/ +static void unpack_fifo(struct isp116x *isp116x) +{ + struct isp116x_ep *ep; + struct ptd *ptd; + int buflen = isp116x->atl_last_dir == PTD_DIR_IN + ? isp116x->atl_buflen : isp116x->atl_bufshrt; + + isp116x_write_reg16(isp116x, HCuPINT, HCuPINT_AIIEOT); + isp116x_write_reg16(isp116x, HCXFERCTR, buflen); + isp116x_write_addr(isp116x, HCATLPORT); + for (ep = isp116x->atl_active; ep; ep = ep->active) { + ptd = &ep->ptd; + ptd->count = isp116x_read_data16(isp116x); + ptd->mps = isp116x_read_data16(isp116x); + ptd->len = isp116x_read_data16(isp116x); + ptd->faddr = isp116x_read_data16(isp116x); + buflen -= sizeof(struct ptd); + /* Skip reading data for last Setup or Out PTD */ + if (ep->active || (isp116x->atl_last_dir == PTD_DIR_IN)) { + read_ptddata_from_fifo(isp116x, ep->data, ep->length); + buflen -= ALIGN(ep->length, 4); + } + dump_ptd(ptd); + dump_ptd_in_data(ptd, ep->data); + } + BUG_ON(buflen); +} + +/*---------------------------------------------------------------*/ + +/* + Set up PTD's. +*/ +static void preproc_atl_queue(struct isp116x *isp116x) +{ + struct isp116x_ep *ep; + struct urb *urb; + struct ptd *ptd; + u16 toggle, dir, len; + + for (ep = isp116x->atl_active; ep; ep = ep->active) { + BUG_ON(list_empty(&ep->hep->urb_list)); + urb = container_of(ep->hep->urb_list.next, + struct urb, urb_list); + ptd = &ep->ptd; + len = ep->length; + spin_lock(&urb->lock); + ep->data = (unsigned char *)urb->transfer_buffer + + urb->actual_length; + + switch (ep->nextpid) { + case USB_PID_IN: + toggle = usb_gettoggle(urb->dev, ep->epnum, 0); + dir = PTD_DIR_IN; + break; + case USB_PID_OUT: + toggle = usb_gettoggle(urb->dev, ep->epnum, 1); + dir = PTD_DIR_OUT; + break; + case USB_PID_SETUP: + toggle = 0; + dir = PTD_DIR_SETUP; + len = sizeof(struct usb_ctrlrequest); + ep->data = urb->setup_packet; + break; + case USB_PID_ACK: + toggle = 1; + len = 0; + dir = (urb->transfer_buffer_length + && usb_pipein(urb->pipe)) + ? PTD_DIR_OUT : PTD_DIR_IN; + break; + default: + /* To please gcc */ + toggle = dir = 0; + ERR("%s %d: ep->nextpid %d\n", __func__, __LINE__, + ep->nextpid); + BUG_ON(1); + } + + ptd->count = PTD_CC_MSK | PTD_ACTIVE_MSK | PTD_TOGGLE(toggle); + ptd->mps = PTD_MPS(ep->maxpacket) + | PTD_SPD(urb->dev->speed == USB_SPEED_LOW) + | PTD_EP(ep->epnum); + ptd->len = PTD_LEN(len) | PTD_DIR(dir); + ptd->faddr = PTD_FA(usb_pipedevice(urb->pipe)); + spin_unlock(&urb->lock); + if (!ep->active) { + ptd->mps |= PTD_LAST_MSK; + isp116x->atl_last_dir = dir; + } + isp116x->atl_bufshrt = sizeof(struct ptd) + isp116x->atl_buflen; + isp116x->atl_buflen = isp116x->atl_bufshrt + ALIGN(len, 4); + } +} + +/* + Analyze transfer results, handle partial transfers and errors +*/ +static void postproc_atl_queue(struct isp116x *isp116x) +{ + struct isp116x_ep *ep; + struct urb *urb; + struct usb_device *udev; + struct ptd *ptd; + int short_not_ok; + u8 cc; + + for (ep = isp116x->atl_active; ep; ep = ep->active) { + BUG_ON(list_empty(&ep->hep->urb_list)); + urb = + container_of(ep->hep->urb_list.next, struct urb, urb_list); + udev = urb->dev; + ptd = &ep->ptd; + cc = PTD_GET_CC(ptd); + + spin_lock(&urb->lock); + short_not_ok = 1; + + /* Data underrun is special. For allowed underrun + we clear the error and continue as normal. For + forbidden underrun we finish the DATA stage + immediately while for control transfer, + we do a STATUS stage. */ + if (cc == TD_DATAUNDERRUN) { + if (!(urb->transfer_flags & URB_SHORT_NOT_OK)) { + DBG("Allowed data underrun\n"); + cc = TD_CC_NOERROR; + short_not_ok = 0; + } else { + ep->error_count = 1; + if (usb_pipecontrol(urb->pipe)) + ep->nextpid = USB_PID_ACK; + else + usb_settoggle(udev, ep->epnum, + ep->nextpid == + USB_PID_OUT, + PTD_GET_TOGGLE(ptd) ^ 1); + urb->status = cc_to_error[TD_DATAUNDERRUN]; + spin_unlock(&urb->lock); + continue; + } + } + /* Keep underrun error through the STATUS stage */ + if (urb->status == cc_to_error[TD_DATAUNDERRUN]) + cc = TD_DATAUNDERRUN; + + if (cc != TD_CC_NOERROR && cc != TD_NOTACCESSED + && (++ep->error_count >= 3 || cc == TD_CC_STALL + || cc == TD_DATAOVERRUN)) { + if (urb->status == -EINPROGRESS) + urb->status = cc_to_error[cc]; + if (ep->nextpid == USB_PID_ACK) + ep->nextpid = 0; + spin_unlock(&urb->lock); + continue; + } + /* According to usb spec, zero-length Int transfer signals + finishing of the urb. Hey, does this apply only + for IN endpoints? */ + if (usb_pipeint(urb->pipe) && !PTD_GET_LEN(ptd)) { + if (urb->status == -EINPROGRESS) + urb->status = 0; + spin_unlock(&urb->lock); + continue; + } + + /* Relax after previously failed, but later succeeded + or correctly NAK'ed retransmission attempt */ + if (ep->error_count + && (cc == TD_CC_NOERROR || cc == TD_NOTACCESSED)) + ep->error_count = 0; + + /* Take into account idiosyncracies of the isp116x chip + regarding toggle bit for failed transfers */ + if (ep->nextpid == USB_PID_OUT) + usb_settoggle(udev, ep->epnum, 1, PTD_GET_TOGGLE(ptd) + ^ (ep->error_count > 0)); + else if (ep->nextpid == USB_PID_IN) + usb_settoggle(udev, ep->epnum, 0, PTD_GET_TOGGLE(ptd) + ^ (ep->error_count > 0)); + + switch (ep->nextpid) { + case USB_PID_IN: + case USB_PID_OUT: + urb->actual_length += PTD_GET_COUNT(ptd); + if (PTD_GET_ACTIVE(ptd) + || (cc != TD_CC_NOERROR && cc < 0x0E)) + break; + if (urb->transfer_buffer_length != urb->actual_length) { + if (short_not_ok) + break; + } else { + if (urb->transfer_flags & URB_ZERO_PACKET + && ep->nextpid == USB_PID_OUT + && !(PTD_GET_COUNT(ptd) % ep->maxpacket)) { + DBG("Zero packet requested\n"); + break; + } + } + /* All data for this URB is transferred, let's finish */ + if (usb_pipecontrol(urb->pipe)) + ep->nextpid = USB_PID_ACK; + else if (urb->status == -EINPROGRESS) + urb->status = 0; + break; + case USB_PID_SETUP: + if (PTD_GET_ACTIVE(ptd) + || (cc != TD_CC_NOERROR && cc < 0x0E)) + break; + if (urb->transfer_buffer_length == urb->actual_length) + ep->nextpid = USB_PID_ACK; + else if (usb_pipeout(urb->pipe)) { + usb_settoggle(udev, 0, 1, 1); + ep->nextpid = USB_PID_OUT; + } else { + usb_settoggle(udev, 0, 0, 1); + ep->nextpid = USB_PID_IN; + } + break; + case USB_PID_ACK: + if (PTD_GET_ACTIVE(ptd) + || (cc != TD_CC_NOERROR && cc < 0x0E)) + break; + if (urb->status == -EINPROGRESS) + urb->status = 0; + ep->nextpid = 0; + break; + default: + BUG_ON(1); + } + spin_unlock(&urb->lock); + } +} + +/* + Take done or failed requests out of schedule. Give back + processed urbs. +*/ +static void finish_request(struct isp116x *isp116x, struct isp116x_ep *ep, + struct urb *urb, struct pt_regs *regs) +__releases(isp116x->lock) __acquires(isp116x->lock) +{ + unsigned i; + + urb->hcpriv = NULL; + ep->error_count = 0; + + if (usb_pipecontrol(urb->pipe)) + ep->nextpid = USB_PID_SETUP; + + urb_dbg(urb, "Finish"); + + spin_unlock(&isp116x->lock); + usb_hcd_giveback_urb(isp116x_to_hcd(isp116x), urb, regs); + spin_lock(&isp116x->lock); + + /* take idle endpoints out of the schedule */ + if (!list_empty(&ep->hep->urb_list)) + return; + + /* async deschedule */ + if (!list_empty(&ep->schedule)) { + list_del_init(&ep->schedule); + return; + } + + /* periodic deschedule */ + DBG("deschedule qh%d/%p branch %d\n", ep->period, ep, ep->branch); + for (i = ep->branch; i < PERIODIC_SIZE; i += ep->period) { + struct isp116x_ep *temp; + struct isp116x_ep **prev = &isp116x->periodic[i]; + + while (*prev && ((temp = *prev) != ep)) + prev = &temp->next; + if (*prev) + *prev = ep->next; + isp116x->load[i] -= ep->load; + } + ep->branch = PERIODIC_SIZE; + isp116x_to_hcd(isp116x)->self.bandwidth_allocated -= + ep->load / ep->period; + + /* switch irq type? */ + if (!--isp116x->periodic_count) { + isp116x->irqenb &= ~HCuPINT_SOF; + isp116x->irqenb |= HCuPINT_ATL; + } +} + +/* + Scan transfer lists, schedule transfers, send data off + to chip. + */ +static void start_atl_transfers(struct isp116x *isp116x) +{ + struct isp116x_ep *last_ep = NULL, *ep; + struct urb *urb; + u16 load = 0; + int len, index, speed, byte_time; + + if (atomic_read(&isp116x->atl_finishing)) + return; + + if (!HC_IS_RUNNING(isp116x_to_hcd(isp116x)->state)) + return; + + /* FIFO not empty? */ + if (isp116x_read_reg16(isp116x, HCBUFSTAT) & HCBUFSTAT_ATL_FULL) + return; + + isp116x->atl_active = NULL; + isp116x->atl_buflen = isp116x->atl_bufshrt = 0; + + /* Schedule int transfers */ + if (isp116x->periodic_count) { + isp116x->fmindex = index = + (isp116x->fmindex + 1) & (PERIODIC_SIZE - 1); + if ((load = isp116x->load[index])) { + /* Bring all int transfers for this frame + into the active queue */ + isp116x->atl_active = last_ep = + isp116x->periodic[index]; + while (last_ep->next) + last_ep = (last_ep->active = last_ep->next); + last_ep->active = NULL; + } + } + + /* Schedule control/bulk transfers */ + list_for_each_entry(ep, &isp116x->async, schedule) { + urb = container_of(ep->hep->urb_list.next, + struct urb, urb_list); + speed = urb->dev->speed; + byte_time = speed == USB_SPEED_LOW + ? BYTE_TIME_LOWSPEED : BYTE_TIME_FULLSPEED; + + if (ep->nextpid == USB_PID_SETUP) { + len = sizeof(struct usb_ctrlrequest); + } else if (ep->nextpid == USB_PID_ACK) { + len = 0; + } else { + /* Find current free length ... */ + len = (MAX_LOAD_LIMIT - load) / byte_time; + + /* ... then limit it to configured max size ... */ + len = min(len, speed == USB_SPEED_LOW ? + MAX_TRANSFER_SIZE_LOWSPEED : + MAX_TRANSFER_SIZE_FULLSPEED); + + /* ... and finally cut to the multiple of MaxPacketSize, + or to the real length if there's enough room. */ + if (len < + (urb->transfer_buffer_length - + urb->actual_length)) { + len -= len % ep->maxpacket; + if (!len) + continue; + } else + len = urb->transfer_buffer_length - + urb->actual_length; + BUG_ON(len < 0); + } + + load += len * byte_time; + if (load > MAX_LOAD_LIMIT) + break; + + ep->active = NULL; + ep->length = len; + if (last_ep) + last_ep->active = ep; + else + isp116x->atl_active = ep; + last_ep = ep; + } + + /* Avoid starving of endpoints */ + if ((&isp116x->async)->next != (&isp116x->async)->prev) + list_move(&isp116x->async, (&isp116x->async)->next); + + if (isp116x->atl_active) { + preproc_atl_queue(isp116x); + pack_fifo(isp116x); + } +} + +/* + Finish the processed transfers +*/ +static void finish_atl_transfers(struct isp116x *isp116x, struct pt_regs *regs) +{ + struct isp116x_ep *ep; + struct urb *urb; + + if (!isp116x->atl_active) + return; + /* Fifo not ready? */ + if (!(isp116x_read_reg16(isp116x, HCBUFSTAT) & HCBUFSTAT_ATL_DONE)) + return; + + atomic_inc(&isp116x->atl_finishing); + unpack_fifo(isp116x); + postproc_atl_queue(isp116x); + for (ep = isp116x->atl_active; ep; ep = ep->active) { + urb = + container_of(ep->hep->urb_list.next, struct urb, urb_list); + /* USB_PID_ACK check here avoids finishing of + control transfers, for which TD_DATAUNDERRUN + occured, while URB_SHORT_NOT_OK was set */ + if (urb && urb->status != -EINPROGRESS + && ep->nextpid != USB_PID_ACK) + finish_request(isp116x, ep, urb, regs); + } + atomic_dec(&isp116x->atl_finishing); +} + +static irqreturn_t isp116x_irq(struct usb_hcd *hcd, struct pt_regs *regs) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + u16 irqstat; + irqreturn_t ret = IRQ_NONE; + + spin_lock(&isp116x->lock); + isp116x_write_reg16(isp116x, HCuPINTENB, 0); + irqstat = isp116x_read_reg16(isp116x, HCuPINT); + isp116x_write_reg16(isp116x, HCuPINT, irqstat); + + if (irqstat & (HCuPINT_ATL | HCuPINT_SOF)) { + ret = IRQ_HANDLED; + finish_atl_transfers(isp116x, regs); + } + + if (irqstat & HCuPINT_OPR) { + u32 intstat = isp116x_read_reg32(isp116x, HCINTSTAT); + isp116x_write_reg32(isp116x, HCINTSTAT, intstat); + if (intstat & HCINT_UE) { + ERR("Unrecoverable error\n"); + /* What should we do here? Reset? */ + } + if (intstat & HCINT_RHSC) { + isp116x->rhstatus = + isp116x_read_reg32(isp116x, HCRHSTATUS); + isp116x->rhport[0] = + isp116x_read_reg32(isp116x, HCRHPORT1); + isp116x->rhport[1] = + isp116x_read_reg32(isp116x, HCRHPORT2); + } + if (intstat & HCINT_RD) { + DBG("---- remote wakeup\n"); + schedule_work(&isp116x->rh_resume); + ret = IRQ_HANDLED; + } + irqstat &= ~HCuPINT_OPR; + ret = IRQ_HANDLED; + } + + if (irqstat & (HCuPINT_ATL | HCuPINT_SOF)) { + start_atl_transfers(isp116x); + } + + isp116x_write_reg16(isp116x, HCuPINTENB, isp116x->irqenb); + spin_unlock(&isp116x->lock); + return ret; +} + +/*-----------------------------------------------------------------*/ + +/* usb 1.1 says max 90% of a frame is available for periodic transfers. + * this driver doesn't promise that much since it's got to handle an + * IRQ per packet; irq handling latencies also use up that time. + */ + +/* out of 1000 us */ +#define MAX_PERIODIC_LOAD 600 +static int balance(struct isp116x *isp116x, u16 period, u16 load) +{ + int i, branch = -ENOSPC; + + /* search for the least loaded schedule branch of that period + which has enough bandwidth left unreserved. */ + for (i = 0; i < period; i++) { + if (branch < 0 || isp116x->load[branch] > isp116x->load[i]) { + int j; + + for (j = i; j < PERIODIC_SIZE; j += period) { + if ((isp116x->load[j] + load) + > MAX_PERIODIC_LOAD) + break; + } + if (j < PERIODIC_SIZE) + continue; + branch = i; + } + } + return branch; +} + +/* NB! ALL the code above this point runs with isp116x->lock + held, irqs off +*/ + +/*-----------------------------------------------------------------*/ + +static int isp116x_urb_enqueue(struct usb_hcd *hcd, + struct usb_host_endpoint *hep, struct urb *urb, + int mem_flags) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + struct usb_device *udev = urb->dev; + unsigned int pipe = urb->pipe; + int is_out = !usb_pipein(pipe); + int type = usb_pipetype(pipe); + int epnum = usb_pipeendpoint(pipe); + struct isp116x_ep *ep = NULL; + unsigned long flags; + int i; + int ret = 0; + + urb_dbg(urb, "Enqueue"); + + if (type == PIPE_ISOCHRONOUS) { + ERR("Isochronous transfers not supported\n"); + urb_dbg(urb, "Refused to enqueue"); + return -ENXIO; + } + /* avoid all allocations within spinlocks: request or endpoint */ + if (!hep->hcpriv) { + ep = kcalloc(1, sizeof *ep, (__force unsigned)mem_flags); + if (!ep) + return -ENOMEM; + } + + spin_lock_irqsave(&isp116x->lock, flags); + if (!HC_IS_RUNNING(hcd->state)) { + ret = -ENODEV; + goto fail; + } + + if (hep->hcpriv) + ep = hep->hcpriv; + else { + INIT_LIST_HEAD(&ep->schedule); + ep->udev = usb_get_dev(udev); + ep->epnum = epnum; + ep->maxpacket = usb_maxpacket(udev, urb->pipe, is_out); + usb_settoggle(udev, epnum, is_out, 0); + + if (type == PIPE_CONTROL) { + ep->nextpid = USB_PID_SETUP; + } else if (is_out) { + ep->nextpid = USB_PID_OUT; + } else { + ep->nextpid = USB_PID_IN; + } + + if (urb->interval) { + /* + With INT URBs submitted, the driver works with SOF + interrupt enabled and ATL interrupt disabled. After + the PTDs are written to fifo ram, the chip starts + fifo processing and usb transfers after the next + SOF and continues until the transfers are finished + (succeeded or failed) or the frame ends. Therefore, + the transfers occur only in every second frame, + while fifo reading/writing and data processing + occur in every other second frame. */ + if (urb->interval < 2) + urb->interval = 2; + if (urb->interval > 2 * PERIODIC_SIZE) + urb->interval = 2 * PERIODIC_SIZE; + ep->period = urb->interval >> 1; + ep->branch = PERIODIC_SIZE; + ep->load = usb_calc_bus_time(udev->speed, + !is_out, + (type == PIPE_ISOCHRONOUS), + usb_maxpacket(udev, pipe, + is_out)) / + 1000; + } + hep->hcpriv = ep; + ep->hep = hep; + } + + /* maybe put endpoint into schedule */ + switch (type) { + case PIPE_CONTROL: + case PIPE_BULK: + if (list_empty(&ep->schedule)) + list_add_tail(&ep->schedule, &isp116x->async); + break; + case PIPE_INTERRUPT: + urb->interval = ep->period; + ep->length = min((int)ep->maxpacket, + urb->transfer_buffer_length); + + /* urb submitted for already existing endpoint */ + if (ep->branch < PERIODIC_SIZE) + break; + + ret = ep->branch = balance(isp116x, ep->period, ep->load); + if (ret < 0) + goto fail; + ret = 0; + + urb->start_frame = (isp116x->fmindex & (PERIODIC_SIZE - 1)) + + ep->branch; + + /* sort each schedule branch by period (slow before fast) + to share the faster parts of the tree without needing + dummy/placeholder nodes */ + DBG("schedule qh%d/%p branch %d\n", ep->period, ep, ep->branch); + for (i = ep->branch; i < PERIODIC_SIZE; i += ep->period) { + struct isp116x_ep **prev = &isp116x->periodic[i]; + struct isp116x_ep *here = *prev; + + while (here && ep != here) { + if (ep->period > here->period) + break; + prev = &here->next; + here = *prev; + } + if (ep != here) { + ep->next = here; + *prev = ep; + } + isp116x->load[i] += ep->load; + } + hcd->self.bandwidth_allocated += ep->load / ep->period; + + /* switch over to SOFint */ + if (!isp116x->periodic_count++) { + isp116x->irqenb &= ~HCuPINT_ATL; + isp116x->irqenb |= HCuPINT_SOF; + isp116x_write_reg16(isp116x, HCuPINTENB, + isp116x->irqenb); + } + } + + /* in case of unlink-during-submit */ + spin_lock(&urb->lock); + if (urb->status != -EINPROGRESS) { + spin_unlock(&urb->lock); + finish_request(isp116x, ep, urb, NULL); + ret = 0; + goto fail; + } + urb->hcpriv = hep; + spin_unlock(&urb->lock); + start_atl_transfers(isp116x); + + fail: + spin_unlock_irqrestore(&isp116x->lock, flags); + return ret; +} + +/* + Dequeue URBs. +*/ +static int isp116x_urb_dequeue(struct usb_hcd *hcd, struct urb *urb) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + struct usb_host_endpoint *hep; + struct isp116x_ep *ep, *ep_act; + unsigned long flags; + + spin_lock_irqsave(&isp116x->lock, flags); + hep = urb->hcpriv; + /* URB already unlinked (or never linked)? */ + if (!hep) { + spin_unlock_irqrestore(&isp116x->lock, flags); + return 0; + } + ep = hep->hcpriv; + WARN_ON(hep != ep->hep); + + /* In front of queue? */ + if (ep->hep->urb_list.next == &urb->urb_list) + /* active? */ + for (ep_act = isp116x->atl_active; ep_act; + ep_act = ep_act->active) + if (ep_act == ep) { + VDBG("dequeue, urb %p active; wait for irq\n", + urb); + urb = NULL; + break; + } + + if (urb) + finish_request(isp116x, ep, urb, NULL); + + spin_unlock_irqrestore(&isp116x->lock, flags); + return 0; +} + +static void isp116x_endpoint_disable(struct usb_hcd *hcd, + struct usb_host_endpoint *hep) +{ + int i; + struct isp116x_ep *ep = hep->hcpriv;; + + if (!ep) + return; + + /* assume we'd just wait for the irq */ + for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++) + msleep(3); + if (!list_empty(&hep->urb_list)) + WARN("ep %p not empty?\n", ep); + + usb_put_dev(ep->udev); + kfree(ep); + hep->hcpriv = NULL; +} + +static int isp116x_get_frame(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + u32 fmnum; + unsigned long flags; + + spin_lock_irqsave(&isp116x->lock, flags); + fmnum = isp116x_read_reg32(isp116x, HCFMNUM); + spin_unlock_irqrestore(&isp116x->lock, flags); + return (int)fmnum; +} + +/*----------------------------------------------------------------*/ + +/* + Adapted from ohci-hub.c. Currently we don't support autosuspend. +*/ +static int isp116x_hub_status_data(struct usb_hcd *hcd, char *buf) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + int ports, i, changed = 0; + + if (!HC_IS_RUNNING(hcd->state)) + return -ESHUTDOWN; + + ports = isp116x->rhdesca & RH_A_NDP; + + /* init status */ + if (isp116x->rhstatus & (RH_HS_LPSC | RH_HS_OCIC)) + buf[0] = changed = 1; + else + buf[0] = 0; + + for (i = 0; i < ports; i++) { + u32 status = isp116x->rhport[i]; + + if (status & (RH_PS_CSC | RH_PS_PESC | RH_PS_PSSC + | RH_PS_OCIC | RH_PS_PRSC)) { + changed = 1; + buf[0] |= 1 << (i + 1); + continue; + } + } + return changed; +} + +static void isp116x_hub_descriptor(struct isp116x *isp116x, + struct usb_hub_descriptor *desc) +{ + u32 reg = isp116x->rhdesca; + + desc->bDescriptorType = 0x29; + desc->bDescLength = 9; + desc->bHubContrCurrent = 0; + desc->bNbrPorts = (u8) (reg & 0x3); + /* Power switching, device type, overcurrent. */ + desc->wHubCharacteristics = + (__force __u16) cpu_to_le16((u16) ((reg >> 8) & 0x1f)); + desc->bPwrOn2PwrGood = (u8) ((reg >> 24) & 0xff); + /* two bitmaps: ports removable, and legacy PortPwrCtrlMask */ + desc->bitmap[0] = desc->bNbrPorts == 1 ? 1 << 1 : 3 << 1; + desc->bitmap[1] = ~0; +} + +/* Perform reset of a given port. + It would be great to just start the reset and let the + USB core to clear the reset in due time. However, + root hub ports should be reset for at least 50 ms, while + our chip stays in reset for about 10 ms. I.e., we must + repeatedly reset it ourself here. +*/ +static inline void root_port_reset(struct isp116x *isp116x, unsigned port) +{ + u32 tmp; + unsigned long flags, t; + + /* Root hub reset should be 50 ms, but some devices + want it even longer. */ + t = jiffies + msecs_to_jiffies(100); + + while (time_before(jiffies, t)) { + spin_lock_irqsave(&isp116x->lock, flags); + /* spin until any current reset finishes */ + for (;;) { + tmp = isp116x_read_reg32(isp116x, port ? + HCRHPORT2 : HCRHPORT1); + if (!(tmp & RH_PS_PRS)) + break; + udelay(500); + } + /* Don't reset a disconnected port */ + if (!(tmp & RH_PS_CCS)) { + spin_unlock_irqrestore(&isp116x->lock, flags); + break; + } + /* Reset lasts 10ms (claims datasheet) */ + isp116x_write_reg32(isp116x, port ? HCRHPORT2 : + HCRHPORT1, (RH_PS_PRS)); + spin_unlock_irqrestore(&isp116x->lock, flags); + msleep(10); + } +} + +/* Adapted from ohci-hub.c */ +static int isp116x_hub_control(struct usb_hcd *hcd, + u16 typeReq, + u16 wValue, u16 wIndex, char *buf, u16 wLength) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + int ret = 0; + unsigned long flags; + int ports = isp116x->rhdesca & RH_A_NDP; + u32 tmp = 0; + + switch (typeReq) { + case ClearHubFeature: + DBG("ClearHubFeature: "); + switch (wValue) { + case C_HUB_OVER_CURRENT: + DBG("C_HUB_OVER_CURRENT\n"); + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg32(isp116x, HCRHSTATUS, RH_HS_OCIC); + spin_unlock_irqrestore(&isp116x->lock, flags); + case C_HUB_LOCAL_POWER: + DBG("C_HUB_LOCAL_POWER\n"); + break; + default: + goto error; + } + break; + case SetHubFeature: + DBG("SetHubFeature: "); + switch (wValue) { + case C_HUB_OVER_CURRENT: + case C_HUB_LOCAL_POWER: + DBG("C_HUB_OVER_CURRENT or C_HUB_LOCAL_POWER\n"); + break; + default: + goto error; + } + break; + case GetHubDescriptor: + DBG("GetHubDescriptor\n"); + isp116x_hub_descriptor(isp116x, + (struct usb_hub_descriptor *)buf); + break; + case GetHubStatus: + DBG("GetHubStatus\n"); + *(__le32 *) buf = cpu_to_le32(0); + break; + case GetPortStatus: + DBG("GetPortStatus\n"); + if (!wIndex || wIndex > ports) + goto error; + tmp = isp116x->rhport[--wIndex]; + *(__le32 *) buf = cpu_to_le32(tmp); + DBG("GetPortStatus: port[%d] %08x\n", wIndex + 1, tmp); + break; + case ClearPortFeature: + DBG("ClearPortFeature: "); + if (!wIndex || wIndex > ports) + goto error; + wIndex--; + + switch (wValue) { + case USB_PORT_FEAT_ENABLE: + DBG("USB_PORT_FEAT_ENABLE\n"); + tmp = RH_PS_CCS; + break; + case USB_PORT_FEAT_C_ENABLE: + DBG("USB_PORT_FEAT_C_ENABLE\n"); + tmp = RH_PS_PESC; + break; + case USB_PORT_FEAT_SUSPEND: + DBG("USB_PORT_FEAT_SUSPEND\n"); + tmp = RH_PS_POCI; + break; + case USB_PORT_FEAT_C_SUSPEND: + DBG("USB_PORT_FEAT_C_SUSPEND\n"); + tmp = RH_PS_PSSC; + break; + case USB_PORT_FEAT_POWER: + DBG("USB_PORT_FEAT_POWER\n"); + tmp = RH_PS_LSDA; + break; + case USB_PORT_FEAT_C_CONNECTION: + DBG("USB_PORT_FEAT_C_CONNECTION\n"); + tmp = RH_PS_CSC; + break; + case USB_PORT_FEAT_C_OVER_CURRENT: + DBG("USB_PORT_FEAT_C_OVER_CURRENT\n"); + tmp = RH_PS_OCIC; + break; + case USB_PORT_FEAT_C_RESET: + DBG("USB_PORT_FEAT_C_RESET\n"); + tmp = RH_PS_PRSC; + break; + default: + goto error; + } + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg32(isp116x, wIndex + ? HCRHPORT2 : HCRHPORT1, tmp); + isp116x->rhport[wIndex] = + isp116x_read_reg32(isp116x, wIndex ? HCRHPORT2 : HCRHPORT1); + spin_unlock_irqrestore(&isp116x->lock, flags); + break; + case SetPortFeature: + DBG("SetPortFeature: "); + if (!wIndex || wIndex > ports) + goto error; + wIndex--; + switch (wValue) { + case USB_PORT_FEAT_SUSPEND: + DBG("USB_PORT_FEAT_SUSPEND\n"); + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg32(isp116x, wIndex + ? HCRHPORT2 : HCRHPORT1, RH_PS_PSS); + break; + case USB_PORT_FEAT_POWER: + DBG("USB_PORT_FEAT_POWER\n"); + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg32(isp116x, wIndex + ? HCRHPORT2 : HCRHPORT1, RH_PS_PPS); + break; + case USB_PORT_FEAT_RESET: + DBG("USB_PORT_FEAT_RESET\n"); + root_port_reset(isp116x, wIndex); + spin_lock_irqsave(&isp116x->lock, flags); + break; + default: + goto error; + } + isp116x->rhport[wIndex] = + isp116x_read_reg32(isp116x, wIndex ? HCRHPORT2 : HCRHPORT1); + spin_unlock_irqrestore(&isp116x->lock, flags); + break; + + default: + error: + /* "protocol stall" on error */ + DBG("PROTOCOL STALL\n"); + ret = -EPIPE; + } + return ret; +} + +#ifdef CONFIG_PM + +static int isp116x_hub_suspend(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + unsigned long flags; + u32 val; + int ret = 0; + + spin_lock_irqsave(&isp116x->lock, flags); + + val = isp116x_read_reg32(isp116x, HCCONTROL); + switch (val & HCCONTROL_HCFS) { + case HCCONTROL_USB_OPER: + hcd->state = HC_STATE_QUIESCING; + val &= (~HCCONTROL_HCFS & ~HCCONTROL_RWE); + val |= HCCONTROL_USB_SUSPEND; + if (hcd->remote_wakeup) + val |= HCCONTROL_RWE; + /* Wait for usb transfers to finish */ + mdelay(2); + isp116x_write_reg32(isp116x, HCCONTROL, val); + hcd->state = HC_STATE_SUSPENDED; + /* Wait for devices to suspend */ + mdelay(5); + case HCCONTROL_USB_SUSPEND: + break; + case HCCONTROL_USB_RESUME: + isp116x_write_reg32(isp116x, HCCONTROL, + (val & ~HCCONTROL_HCFS) | + HCCONTROL_USB_RESET); + case HCCONTROL_USB_RESET: + ret = -EBUSY; + break; + default: + ret = -EINVAL; + } + + spin_unlock_irqrestore(&isp116x->lock, flags); + return ret; +} + +static int isp116x_hub_resume(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + u32 val; + int ret = -EINPROGRESS; + + msleep(5); + spin_lock_irq(&isp116x->lock); + + val = isp116x_read_reg32(isp116x, HCCONTROL); + switch (val & HCCONTROL_HCFS) { + case HCCONTROL_USB_SUSPEND: + val &= ~HCCONTROL_HCFS; + val |= HCCONTROL_USB_RESUME; + isp116x_write_reg32(isp116x, HCCONTROL, val); + case HCCONTROL_USB_RESUME: + break; + case HCCONTROL_USB_OPER: + /* Without setting power_state here the + SUSPENDED state won't be removed from + sysfs/usbN/power.state as a response to remote + wakeup. Maybe in the future. */ + hcd->self.root_hub->dev.power.power_state = PMSG_ON; + ret = 0; + break; + default: + ret = -EBUSY; + } + + if (ret != -EINPROGRESS) { + spin_unlock_irq(&isp116x->lock); + return ret; + } + + val = isp116x->rhdesca & RH_A_NDP; + while (val--) { + u32 stat = + isp116x_read_reg32(isp116x, val ? HCRHPORT2 : HCRHPORT1); + /* force global, not selective, resume */ + if (!(stat & RH_PS_PSS)) + continue; + DBG("%s: Resuming port %d\n", __func__, val); + isp116x_write_reg32(isp116x, RH_PS_POCI, val + ? HCRHPORT2 : HCRHPORT1); + } + spin_unlock_irq(&isp116x->lock); + + hcd->state = HC_STATE_RESUMING; + mdelay(20); + + /* Go operational */ + spin_lock_irq(&isp116x->lock); + val = isp116x_read_reg32(isp116x, HCCONTROL); + isp116x_write_reg32(isp116x, HCCONTROL, + (val & ~HCCONTROL_HCFS) | HCCONTROL_USB_OPER); + spin_unlock_irq(&isp116x->lock); + /* see analogous comment above */ + hcd->self.root_hub->dev.power.power_state = PMSG_ON; + hcd->state = HC_STATE_RUNNING; + + return 0; +} + +static void isp116x_rh_resume(void *_hcd) +{ + struct usb_hcd *hcd = _hcd; + + usb_resume_device(hcd->self.root_hub); +} + +#else + +#define isp116x_hub_suspend NULL +#define isp116x_hub_resume NULL + +static void isp116x_rh_resume(void *_hcd) +{ +} + +#endif + +/*-----------------------------------------------------------------*/ + +#ifdef STUB_DEBUG_FILE + +static inline void create_debug_file(struct isp116x *isp116x) +{ +} + +static inline void remove_debug_file(struct isp116x *isp116x) +{ +} + +#else + +#include +#include + +static void dump_irq(struct seq_file *s, char *label, u16 mask) +{ + seq_printf(s, "%s %04x%s%s%s%s%s%s\n", label, mask, + mask & HCuPINT_CLKRDY ? " clkrdy" : "", + mask & HCuPINT_SUSP ? " susp" : "", + mask & HCuPINT_OPR ? " opr" : "", + mask & HCuPINT_AIIEOT ? " eot" : "", + mask & HCuPINT_ATL ? " atl" : "", + mask & HCuPINT_SOF ? " sof" : ""); +} + +static void dump_int(struct seq_file *s, char *label, u32 mask) +{ + seq_printf(s, "%s %08x%s%s%s%s%s%s%s\n", label, mask, + mask & HCINT_MIE ? " MIE" : "", + mask & HCINT_RHSC ? " rhsc" : "", + mask & HCINT_FNO ? " fno" : "", + mask & HCINT_UE ? " ue" : "", + mask & HCINT_RD ? " rd" : "", + mask & HCINT_SF ? " sof" : "", mask & HCINT_SO ? " so" : ""); +} + +static int proc_isp116x_show(struct seq_file *s, void *unused) +{ + struct isp116x *isp116x = s->private; + struct isp116x_ep *ep; + struct urb *urb; + unsigned i; + char *str; + + seq_printf(s, "%s\n%s version %s\n", + isp116x_to_hcd(isp116x)->product_desc, hcd_name, + DRIVER_VERSION); + + if (HC_IS_SUSPENDED(isp116x_to_hcd(isp116x)->state)) { + seq_printf(s, "HCD is suspended\n"); + return 0; + } + if (!HC_IS_RUNNING(isp116x_to_hcd(isp116x)->state)) { + seq_printf(s, "HCD not running\n"); + return 0; + } + + spin_lock_irq(&isp116x->lock); + + dump_irq(s, "hc_irq_enable", isp116x_read_reg16(isp116x, HCuPINTENB)); + dump_irq(s, "hc_irq_status", isp116x_read_reg16(isp116x, HCuPINT)); + dump_int(s, "hc_int_enable", isp116x_read_reg32(isp116x, HCINTENB)); + dump_int(s, "hc_int_status", isp116x_read_reg32(isp116x, HCINTSTAT)); + + list_for_each_entry(ep, &isp116x->async, schedule) { + + switch (ep->nextpid) { + case USB_PID_IN: + str = "in"; + break; + case USB_PID_OUT: + str = "out"; + break; + case USB_PID_SETUP: + str = "setup"; + break; + case USB_PID_ACK: + str = "status"; + break; + default: + str = "?"; + break; + }; + seq_printf(s, "%p, ep%d%s, maxpacket %d:\n", ep, + ep->epnum, str, ep->maxpacket); + list_for_each_entry(urb, &ep->hep->urb_list, urb_list) { + seq_printf(s, " urb%p, %d/%d\n", urb, + urb->actual_length, + urb->transfer_buffer_length); + } + } + if (!list_empty(&isp116x->async)) + seq_printf(s, "\n"); + + seq_printf(s, "periodic size= %d\n", PERIODIC_SIZE); + + for (i = 0; i < PERIODIC_SIZE; i++) { + ep = isp116x->periodic[i]; + if (!ep) + continue; + seq_printf(s, "%2d [%3d]:\n", i, isp116x->load[i]); + + /* DUMB: prints shared entries multiple times */ + do { + seq_printf(s, " %d/%p (%sdev%d ep%d%s max %d)\n", + ep->period, ep, + (ep->udev->speed == + USB_SPEED_FULL) ? "" : "ls ", + ep->udev->devnum, ep->epnum, + (ep->epnum == + 0) ? "" : ((ep->nextpid == + USB_PID_IN) ? "in" : "out"), + ep->maxpacket); + ep = ep->next; + } while (ep); + } + spin_unlock_irq(&isp116x->lock); + seq_printf(s, "\n"); + + return 0; +} + +static int proc_isp116x_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_isp116x_show, PDE(inode)->data); +} + +static struct file_operations proc_ops = { + .open = proc_isp116x_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* expect just one isp116x per system */ +static const char proc_filename[] = "driver/isp116x"; + +static void create_debug_file(struct isp116x *isp116x) +{ + struct proc_dir_entry *pde; + + pde = create_proc_entry(proc_filename, 0, NULL); + if (pde == NULL) + return; + + pde->proc_fops = &proc_ops; + pde->data = isp116x; + isp116x->pde = pde; +} + +static void remove_debug_file(struct isp116x *isp116x) +{ + if (isp116x->pde) + remove_proc_entry(proc_filename, NULL); +} + +#endif + +/*-----------------------------------------------------------------*/ + +/* + Software reset - can be called from any contect. +*/ +static int isp116x_sw_reset(struct isp116x *isp116x) +{ + int retries = 15; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg16(isp116x, HCSWRES, HCSWRES_MAGIC); + isp116x_write_reg32(isp116x, HCCMDSTAT, HCCMDSTAT_HCR); + while (--retries) { + /* It usually resets within 1 ms */ + mdelay(1); + if (!(isp116x_read_reg32(isp116x, HCCMDSTAT) & HCCMDSTAT_HCR)) + break; + } + if (!retries) { + ERR("Software reset timeout\n"); + ret = -ETIME; + } + spin_unlock_irqrestore(&isp116x->lock, flags); + return ret; +} + +/* + Reset. Tries to perform platform-specific hardware + reset first; falls back to software reset. +*/ +static int isp116x_reset(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + unsigned long t; + u16 clkrdy = 0; + int ret = 0, timeout = 15 /* ms */ ; + + if (isp116x->board && isp116x->board->reset) { + /* Hardware reset */ + isp116x->board->reset(hcd->self.controller, 1); + msleep(10); + if (isp116x->board->clock) + isp116x->board->clock(hcd->self.controller, 1); + msleep(1); + isp116x->board->reset(hcd->self.controller, 0); + } else + ret = isp116x_sw_reset(isp116x); + + if (ret) + return ret; + + t = jiffies + msecs_to_jiffies(timeout); + while (time_before_eq(jiffies, t)) { + msleep(4); + spin_lock_irq(&isp116x->lock); + clkrdy = isp116x_read_reg16(isp116x, HCuPINT) & HCuPINT_CLKRDY; + spin_unlock_irq(&isp116x->lock); + if (clkrdy) + break; + } + if (!clkrdy) { + ERR("Clock not ready after 20ms\n"); + ret = -ENODEV; + } + return ret; +} + +static void isp116x_stop(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&isp116x->lock, flags); + isp116x_write_reg16(isp116x, HCuPINTENB, 0); + + /* Switch off ports' power, some devices don't come up + after next 'insmod' without this */ + val = isp116x_read_reg32(isp116x, HCRHDESCA); + val &= ~(RH_A_NPS | RH_A_PSM); + isp116x_write_reg32(isp116x, HCRHDESCA, val); + isp116x_write_reg32(isp116x, HCRHSTATUS, RH_HS_LPS); + spin_unlock_irqrestore(&isp116x->lock, flags); + + /* Put the chip into reset state */ + if (isp116x->board && isp116x->board->reset) + isp116x->board->reset(hcd->self.controller, 0); + else + isp116x_sw_reset(isp116x); + + /* Stop the clock */ + if (isp116x->board && isp116x->board->clock) + isp116x->board->clock(hcd->self.controller, 0); +} + +/* + Configure the chip. The chip must be successfully reset by now. +*/ +static int isp116x_start(struct usb_hcd *hcd) +{ + struct isp116x *isp116x = hcd_to_isp116x(hcd); + struct isp116x_platform_data *board = isp116x->board; + struct usb_device *udev; + u32 val; + unsigned long flags; + + spin_lock_irqsave(&isp116x->lock, flags); + + /* clear interrupt status and disable all interrupt sources */ + isp116x_write_reg16(isp116x, HCuPINT, 0xff); + isp116x_write_reg16(isp116x, HCuPINTENB, 0); + + val = isp116x_read_reg16(isp116x, HCCHIPID); + if ((val & HCCHIPID_MASK) != HCCHIPID_MAGIC) { + ERR("Invalid chip ID %04x\n", val); + spin_unlock_irqrestore(&isp116x->lock, flags); + return -ENODEV; + } + + isp116x_write_reg16(isp116x, HCITLBUFLEN, ISP116x_ITL_BUFSIZE); + isp116x_write_reg16(isp116x, HCATLBUFLEN, ISP116x_ATL_BUFSIZE); + + /* ----- HW conf */ + val = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1); + if (board->sel15Kres) + val |= HCHWCFG_15KRSEL; + /* Remote wakeup won't work without working clock */ + if (board->clknotstop || board->remote_wakeup_enable) + val |= HCHWCFG_CLKNOTSTOP; + if (board->oc_enable) + val |= HCHWCFG_ANALOG_OC; + if (board->int_act_high) + val |= HCHWCFG_INT_POL; + if (board->int_edge_triggered) + val |= HCHWCFG_INT_TRIGGER; + isp116x_write_reg16(isp116x, HCHWCFG, val); + + /* ----- Root hub conf */ + val = 0; + /* AN10003_1.pdf recommends NPS to be always 1 */ + if (board->no_power_switching) + val |= RH_A_NPS; + if (board->power_switching_mode) + val |= RH_A_PSM; + if (board->potpg) + val |= (board->potpg << 24) & RH_A_POTPGT; + else + val |= (25 << 24) & RH_A_POTPGT; + isp116x_write_reg32(isp116x, HCRHDESCA, val); + isp116x->rhdesca = isp116x_read_reg32(isp116x, HCRHDESCA); + + val = RH_B_PPCM; + isp116x_write_reg32(isp116x, HCRHDESCB, val); + isp116x->rhdescb = isp116x_read_reg32(isp116x, HCRHDESCB); + + val = 0; + if (board->remote_wakeup_enable) { + hcd->can_wakeup = 1; + val |= RH_HS_DRWE; + } + isp116x_write_reg32(isp116x, HCRHSTATUS, val); + isp116x->rhstatus = isp116x_read_reg32(isp116x, HCRHSTATUS); + + isp116x_write_reg32(isp116x, HCFMINTVL, 0x27782edf); + spin_unlock_irqrestore(&isp116x->lock, flags); + + udev = usb_alloc_dev(NULL, &hcd->self, 0); + if (!udev) { + isp116x_stop(hcd); + return -ENOMEM; + } + + udev->speed = USB_SPEED_FULL; + hcd->state = HC_STATE_RUNNING; + + if (usb_hcd_register_root_hub(udev, hcd) != 0) { + isp116x_stop(hcd); + usb_put_dev(udev); + return -ENODEV; + } + + spin_lock_irqsave(&isp116x->lock, flags); + /* Set up interrupts */ + isp116x->intenb = HCINT_MIE | HCINT_RHSC | HCINT_UE; + if (board->remote_wakeup_enable) + isp116x->intenb |= HCINT_RD; + isp116x->irqenb = HCuPINT_ATL | HCuPINT_OPR; /* | HCuPINT_SUSP; */ + isp116x_write_reg32(isp116x, HCINTENB, isp116x->intenb); + isp116x_write_reg16(isp116x, HCuPINTENB, isp116x->irqenb); + + /* Go operational */ + val = HCCONTROL_USB_OPER; + /* Remote wakeup connected - NOT SUPPORTED */ + /* if (board->remote_wakeup_connected) + val |= HCCONTROL_RWC; */ + if (board->remote_wakeup_enable) + val |= HCCONTROL_RWE; + isp116x_write_reg32(isp116x, HCCONTROL, val); + + /* Disable ports to avoid race in device enumeration */ + isp116x_write_reg32(isp116x, HCRHPORT1, RH_PS_CCS); + isp116x_write_reg32(isp116x, HCRHPORT2, RH_PS_CCS); + + isp116x_show_regs(isp116x); + spin_unlock_irqrestore(&isp116x->lock, flags); + return 0; +} + +/*-----------------------------------------------------------------*/ + +static struct hc_driver isp116x_hc_driver = { + .description = hcd_name, + .product_desc = "ISP116x Host Controller", + .hcd_priv_size = sizeof(struct isp116x), + + .irq = isp116x_irq, + .flags = HCD_USB11, + + .reset = isp116x_reset, + .start = isp116x_start, + .stop = isp116x_stop, + + .urb_enqueue = isp116x_urb_enqueue, + .urb_dequeue = isp116x_urb_dequeue, + .endpoint_disable = isp116x_endpoint_disable, + + .get_frame_number = isp116x_get_frame, + + .hub_status_data = isp116x_hub_status_data, + .hub_control = isp116x_hub_control, + .hub_suspend = isp116x_hub_suspend, + .hub_resume = isp116x_hub_resume, +}; + +/*----------------------------------------------------------------*/ + +static int __init_or_module isp116x_remove(struct device *dev) +{ + struct usb_hcd *hcd = dev_get_drvdata(dev); + struct isp116x *isp116x = hcd_to_isp116x(hcd); + struct platform_device *pdev; + struct resource *res; + + pdev = container_of(dev, struct platform_device, dev); + remove_debug_file(isp116x); + usb_remove_hcd(hcd); + + iounmap(isp116x->data_reg); + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + release_mem_region(res->start, 2); + iounmap(isp116x->addr_reg); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(res->start, 2); + + usb_put_hcd(hcd); + return 0; +} + +#define resource_len(r) (((r)->end - (r)->start) + 1) + +static int __init isp116x_probe(struct device *dev) +{ + struct usb_hcd *hcd; + struct isp116x *isp116x; + struct platform_device *pdev; + struct resource *addr, *data; + void __iomem *addr_reg; + void __iomem *data_reg; + int irq; + int ret = 0; + + pdev = container_of(dev, struct platform_device, dev); + if (pdev->num_resources < 3) { + ret = -ENODEV; + goto err1; + } + + data = platform_get_resource(pdev, IORESOURCE_MEM, 0); + addr = platform_get_resource(pdev, IORESOURCE_MEM, 1); + irq = platform_get_irq(pdev, 0); + if (!addr || !data || irq < 0) { + ret = -ENODEV; + goto err1; + } + + if (dev->dma_mask) { + DBG("DMA not supported\n"); + ret = -EINVAL; + goto err1; + } + + if (!request_mem_region(addr->start, 2, hcd_name)) { + ret = -EBUSY; + goto err1; + } + addr_reg = ioremap(addr->start, resource_len(addr)); + if (addr_reg == NULL) { + ret = -ENOMEM; + goto err2; + } + if (!request_mem_region(data->start, 2, hcd_name)) { + ret = -EBUSY; + goto err3; + } + data_reg = ioremap(data->start, resource_len(data)); + if (data_reg == NULL) { + ret = -ENOMEM; + goto err4; + } + + /* allocate and initialize hcd */ + hcd = usb_create_hcd(&isp116x_hc_driver, dev, dev->bus_id); + if (!hcd) { + ret = -ENOMEM; + goto err5; + } + /* this rsrc_start is bogus */ + hcd->rsrc_start = addr->start; + isp116x = hcd_to_isp116x(hcd); + isp116x->data_reg = data_reg; + isp116x->addr_reg = addr_reg; + spin_lock_init(&isp116x->lock); + INIT_LIST_HEAD(&isp116x->async); + INIT_WORK(&isp116x->rh_resume, isp116x_rh_resume, hcd); + isp116x->board = dev->platform_data; + + if (!isp116x->board) { + ERR("Platform data structure not initialized\n"); + ret = -ENODEV; + goto err6; + } + if (isp116x_check_platform_delay(isp116x)) { + ERR("USE_PLATFORM_DELAY defined, but delay function not " + "implemented.\n"); + ERR("See comments in drivers/usb/host/isp116x-hcd.c\n"); + ret = -ENODEV; + goto err6; + } + + ret = usb_add_hcd(hcd, irq, SA_INTERRUPT); + if (ret != 0) + goto err6; + + create_debug_file(isp116x); + return 0; + + err6: + usb_put_hcd(hcd); + err5: + iounmap(data_reg); + err4: + release_mem_region(data->start, 2); + err3: + iounmap(addr_reg); + err2: + release_mem_region(addr->start, 2); + err1: + ERR("init error, %d\n", ret); + return ret; +} + +#ifdef CONFIG_PM +/* + Suspend of platform device +*/ +static int isp116x_suspend(struct device *dev, pm_message_t state, u32 phase) +{ + int ret = 0; + struct usb_hcd *hcd = dev_get_drvdata(dev); + + VDBG("%s: state %x, phase %x\n", __func__, state, phase); + + if (phase != SUSPEND_DISABLE && phase != SUSPEND_POWER_DOWN) + return 0; + + ret = usb_suspend_device(hcd->self.root_hub, state); + if (!ret) { + dev->power.power_state = state; + INFO("%s suspended\n", (char *)hcd_name); + } else + ERR("%s suspend failed\n", (char *)hcd_name); + + return ret; +} + +/* + Resume platform device +*/ +static int isp116x_resume(struct device *dev, u32 phase) +{ + int ret = 0; + struct usb_hcd *hcd = dev_get_drvdata(dev); + + VDBG("%s: state %x, phase %x\n", __func__, dev->power.power_state, + phase); + if (phase != RESUME_POWER_ON) + return 0; + + ret = usb_resume_device(hcd->self.root_hub); + if (!ret) { + dev->power.power_state = PMSG_ON; + VDBG("%s resumed\n", (char *)hcd_name); + } + return ret; +} + +#else + +#define isp116x_suspend NULL +#define isp116x_resume NULL + +#endif + +static struct device_driver isp116x_driver = { + .name = (char *)hcd_name, + .bus = &platform_bus_type, + .probe = isp116x_probe, + .remove = isp116x_remove, + .suspend = isp116x_suspend, + .resume = isp116x_resume, +}; + +/*-----------------------------------------------------------------*/ + +static int __init isp116x_init(void) +{ + if (usb_disabled()) + return -ENODEV; + + INFO("driver %s, %s\n", hcd_name, DRIVER_VERSION); + return driver_register(&isp116x_driver); +} + +module_init(isp116x_init); + +static void __exit isp116x_cleanup(void) +{ + driver_unregister(&isp116x_driver); +} + +module_exit(isp116x_cleanup); diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h new file mode 100644 index 00000000000..58873470dcf --- /dev/null +++ b/drivers/usb/host/isp116x.h @@ -0,0 +1,583 @@ +/* + * ISP116x register declarations and HCD data structures + * + * Copyright (C) 2005 Olav Kongas + * Portions: + * Copyright (C) 2004 Lothar Wassmann + * Copyright (C) 2004 Psion Teklogix + * Copyright (C) 2004 David Brownell + */ + +/* us of 1ms frame */ +#define MAX_LOAD_LIMIT 850 + +/* Full speed: max # of bytes to transfer for a single urb + at a time must be < 1024 && must be multiple of 64. + 832 allows transfering 4kiB within 5 frames. */ +#define MAX_TRANSFER_SIZE_FULLSPEED 832 + +/* Low speed: there is no reason to schedule in very big + chunks; often the requested long transfers are for + string descriptors containing short strings. */ +#define MAX_TRANSFER_SIZE_LOWSPEED 64 + +/* Bytetime (us), a rough indication of how much time it + would take to transfer a byte of useful data over USB */ +#define BYTE_TIME_FULLSPEED 1 +#define BYTE_TIME_LOWSPEED 20 + +/* Buffer sizes */ +#define ISP116x_BUF_SIZE 4096 +#define ISP116x_ITL_BUFSIZE 0 +#define ISP116x_ATL_BUFSIZE ((ISP116x_BUF_SIZE) - 2*(ISP116x_ITL_BUFSIZE)) + +#define ISP116x_WRITE_OFFSET 0x80 + +/*------------ ISP116x registers/bits ------------*/ +#define HCREVISION 0x00 +#define HCCONTROL 0x01 +#define HCCONTROL_HCFS (3 << 6) /* host controller + functional state */ +#define HCCONTROL_USB_RESET (0 << 6) +#define HCCONTROL_USB_RESUME (1 << 6) +#define HCCONTROL_USB_OPER (2 << 6) +#define HCCONTROL_USB_SUSPEND (3 << 6) +#define HCCONTROL_RWC (1 << 9) /* remote wakeup connected */ +#define HCCONTROL_RWE (1 << 10) /* remote wakeup enable */ +#define HCCMDSTAT 0x02 +#define HCCMDSTAT_HCR (1 << 0) /* host controller reset */ +#define HCCMDSTAT_SOC (3 << 16) /* scheduling overrun count */ +#define HCINTSTAT 0x03 +#define HCINT_SO (1 << 0) /* scheduling overrun */ +#define HCINT_WDH (1 << 1) /* writeback of done_head */ +#define HCINT_SF (1 << 2) /* start frame */ +#define HCINT_RD (1 << 3) /* resume detect */ +#define HCINT_UE (1 << 4) /* unrecoverable error */ +#define HCINT_FNO (1 << 5) /* frame number overflow */ +#define HCINT_RHSC (1 << 6) /* root hub status change */ +#define HCINT_OC (1 << 30) /* ownership change */ +#define HCINT_MIE (1 << 31) /* master interrupt enable */ +#define HCINTENB 0x04 +#define HCINTDIS 0x05 +#define HCFMINTVL 0x0d +#define HCFMREM 0x0e +#define HCFMNUM 0x0f +#define HCLSTHRESH 0x11 +#define HCRHDESCA 0x12 +#define RH_A_NDP (0x3 << 0) /* # downstream ports */ +#define RH_A_PSM (1 << 8) /* power switching mode */ +#define RH_A_NPS (1 << 9) /* no power switching */ +#define RH_A_DT (1 << 10) /* device type (mbz) */ +#define RH_A_OCPM (1 << 11) /* overcurrent protection + mode */ +#define RH_A_NOCP (1 << 12) /* no overcurrent protection */ +#define RH_A_POTPGT (0xff << 24) /* power on -> power good + time */ +#define HCRHDESCB 0x13 +#define RH_B_DR (0xffff << 0) /* device removable flags */ +#define RH_B_PPCM (0xffff << 16) /* port power control mask */ +#define HCRHSTATUS 0x14 +#define RH_HS_LPS (1 << 0) /* local power status */ +#define RH_HS_OCI (1 << 1) /* over current indicator */ +#define RH_HS_DRWE (1 << 15) /* device remote wakeup + enable */ +#define RH_HS_LPSC (1 << 16) /* local power status change */ +#define RH_HS_OCIC (1 << 17) /* over current indicator + change */ +#define RH_HS_CRWE (1 << 31) /* clear remote wakeup + enable */ +#define HCRHPORT1 0x15 +#define RH_PS_CCS (1 << 0) /* current connect status */ +#define RH_PS_PES (1 << 1) /* port enable status */ +#define RH_PS_PSS (1 << 2) /* port suspend status */ +#define RH_PS_POCI (1 << 3) /* port over current + indicator */ +#define RH_PS_PRS (1 << 4) /* port reset status */ +#define RH_PS_PPS (1 << 8) /* port power status */ +#define RH_PS_LSDA (1 << 9) /* low speed device attached */ +#define RH_PS_CSC (1 << 16) /* connect status change */ +#define RH_PS_PESC (1 << 17) /* port enable status change */ +#define RH_PS_PSSC (1 << 18) /* port suspend status + change */ +#define RH_PS_OCIC (1 << 19) /* over current indicator + change */ +#define RH_PS_PRSC (1 << 20) /* port reset status change */ +#define HCRHPORT_CLRMASK (0x1f << 16) +#define HCRHPORT2 0x16 +#define HCHWCFG 0x20 +#define HCHWCFG_15KRSEL (1 << 12) +#define HCHWCFG_CLKNOTSTOP (1 << 11) +#define HCHWCFG_ANALOG_OC (1 << 10) +#define HCHWCFG_DACK_MODE (1 << 8) +#define HCHWCFG_EOT_POL (1 << 7) +#define HCHWCFG_DACK_POL (1 << 6) +#define HCHWCFG_DREQ_POL (1 << 5) +#define HCHWCFG_DBWIDTH_MASK (0x03 << 3) +#define HCHWCFG_DBWIDTH(n) (((n) << 3) & HCHWCFG_DBWIDTH_MASK) +#define HCHWCFG_INT_POL (1 << 2) +#define HCHWCFG_INT_TRIGGER (1 << 1) +#define HCHWCFG_INT_ENABLE (1 << 0) +#define HCDMACFG 0x21 +#define HCDMACFG_BURST_LEN_MASK (0x03 << 5) +#define HCDMACFG_BURST_LEN(n) (((n) << 5) & HCDMACFG_BURST_LEN_MASK) +#define HCDMACFG_BURST_LEN_1 HCDMACFG_BURST_LEN(0) +#define HCDMACFG_BURST_LEN_4 HCDMACFG_BURST_LEN(1) +#define HCDMACFG_BURST_LEN_8 HCDMACFG_BURST_LEN(2) +#define HCDMACFG_DMA_ENABLE (1 << 4) +#define HCDMACFG_BUF_TYPE_MASK (0x07 << 1) +#define HCDMACFG_CTR_SEL (1 << 2) +#define HCDMACFG_ITLATL_SEL (1 << 1) +#define HCDMACFG_DMA_RW_SELECT (1 << 0) +#define HCXFERCTR 0x22 +#define HCuPINT 0x24 +#define HCuPINT_SOF (1 << 0) +#define HCuPINT_ATL (1 << 1) +#define HCuPINT_AIIEOT (1 << 2) +#define HCuPINT_OPR (1 << 4) +#define HCuPINT_SUSP (1 << 5) +#define HCuPINT_CLKRDY (1 << 6) +#define HCuPINTENB 0x25 +#define HCCHIPID 0x27 +#define HCCHIPID_MASK 0xff00 +#define HCCHIPID_MAGIC 0x6100 +#define HCSCRATCH 0x28 +#define HCSWRES 0x29 +#define HCSWRES_MAGIC 0x00f6 +#define HCITLBUFLEN 0x2a +#define HCATLBUFLEN 0x2b +#define HCBUFSTAT 0x2c +#define HCBUFSTAT_ITL0_FULL (1 << 0) +#define HCBUFSTAT_ITL1_FULL (1 << 1) +#define HCBUFSTAT_ATL_FULL (1 << 2) +#define HCBUFSTAT_ITL0_DONE (1 << 3) +#define HCBUFSTAT_ITL1_DONE (1 << 4) +#define HCBUFSTAT_ATL_DONE (1 << 5) +#define HCRDITL0LEN 0x2d +#define HCRDITL1LEN 0x2e +#define HCITLPORT 0x40 +#define HCATLPORT 0x41 + +/* Philips transfer descriptor */ +struct ptd { + u16 count; +#define PTD_COUNT_MSK (0x3ff << 0) +#define PTD_TOGGLE_MSK (1 << 10) +#define PTD_ACTIVE_MSK (1 << 11) +#define PTD_CC_MSK (0xf << 12) + u16 mps; +#define PTD_MPS_MSK (0x3ff << 0) +#define PTD_SPD_MSK (1 << 10) +#define PTD_LAST_MSK (1 << 11) +#define PTD_EP_MSK (0xf << 12) + u16 len; +#define PTD_LEN_MSK (0x3ff << 0) +#define PTD_DIR_MSK (3 << 10) +#define PTD_DIR_SETUP (0) +#define PTD_DIR_OUT (1) +#define PTD_DIR_IN (2) +#define PTD_B5_5_MSK (1 << 13) + u16 faddr; +#define PTD_FA_MSK (0x7f << 0) +#define PTD_FMT_MSK (1 << 7) +} __attribute__ ((packed, aligned(2))); + +/* PTD accessor macros. */ +#define PTD_GET_COUNT(p) (((p)->count & PTD_COUNT_MSK) >> 0) +#define PTD_COUNT(v) (((v) << 0) & PTD_COUNT_MSK) +#define PTD_GET_TOGGLE(p) (((p)->count & PTD_TOGGLE_MSK) >> 10) +#define PTD_TOGGLE(v) (((v) << 10) & PTD_TOGGLE_MSK) +#define PTD_GET_ACTIVE(p) (((p)->count & PTD_ACTIVE_MSK) >> 11) +#define PTD_ACTIVE(v) (((v) << 11) & PTD_ACTIVE_MSK) +#define PTD_GET_CC(p) (((p)->count & PTD_CC_MSK) >> 12) +#define PTD_CC(v) (((v) << 12) & PTD_CC_MSK) +#define PTD_GET_MPS(p) (((p)->mps & PTD_MPS_MSK) >> 0) +#define PTD_MPS(v) (((v) << 0) & PTD_MPS_MSK) +#define PTD_GET_SPD(p) (((p)->mps & PTD_SPD_MSK) >> 10) +#define PTD_SPD(v) (((v) << 10) & PTD_SPD_MSK) +#define PTD_GET_LAST(p) (((p)->mps & PTD_LAST_MSK) >> 11) +#define PTD_LAST(v) (((v) << 11) & PTD_LAST_MSK) +#define PTD_GET_EP(p) (((p)->mps & PTD_EP_MSK) >> 12) +#define PTD_EP(v) (((v) << 12) & PTD_EP_MSK) +#define PTD_GET_LEN(p) (((p)->len & PTD_LEN_MSK) >> 0) +#define PTD_LEN(v) (((v) << 0) & PTD_LEN_MSK) +#define PTD_GET_DIR(p) (((p)->len & PTD_DIR_MSK) >> 10) +#define PTD_DIR(v) (((v) << 10) & PTD_DIR_MSK) +#define PTD_GET_B5_5(p) (((p)->len & PTD_B5_5_MSK) >> 13) +#define PTD_B5_5(v) (((v) << 13) & PTD_B5_5_MSK) +#define PTD_GET_FA(p) (((p)->faddr & PTD_FA_MSK) >> 0) +#define PTD_FA(v) (((v) << 0) & PTD_FA_MSK) +#define PTD_GET_FMT(p) (((p)->faddr & PTD_FMT_MSK) >> 7) +#define PTD_FMT(v) (((v) << 7) & PTD_FMT_MSK) + +/* Hardware transfer status codes -- CC from ptd->count */ +#define TD_CC_NOERROR 0x00 +#define TD_CC_CRC 0x01 +#define TD_CC_BITSTUFFING 0x02 +#define TD_CC_DATATOGGLEM 0x03 +#define TD_CC_STALL 0x04 +#define TD_DEVNOTRESP 0x05 +#define TD_PIDCHECKFAIL 0x06 +#define TD_UNEXPECTEDPID 0x07 +#define TD_DATAOVERRUN 0x08 +#define TD_DATAUNDERRUN 0x09 + /* 0x0A, 0x0B reserved for hardware */ +#define TD_BUFFEROVERRUN 0x0C +#define TD_BUFFERUNDERRUN 0x0D + /* 0x0E, 0x0F reserved for HCD */ +#define TD_NOTACCESSED 0x0F + +/* map PTD status codes (CC) to errno values */ +static const int cc_to_error[16] = { + /* No Error */ 0, + /* CRC Error */ -EILSEQ, + /* Bit Stuff */ -EPROTO, + /* Data Togg */ -EILSEQ, + /* Stall */ -EPIPE, + /* DevNotResp */ -ETIMEDOUT, + /* PIDCheck */ -EPROTO, + /* UnExpPID */ -EPROTO, + /* DataOver */ -EOVERFLOW, + /* DataUnder */ -EREMOTEIO, + /* (for hw) */ -EIO, + /* (for hw) */ -EIO, + /* BufferOver */ -ECOMM, + /* BuffUnder */ -ENOSR, + /* (for HCD) */ -EALREADY, + /* (for HCD) */ -EALREADY +}; + +/*--------------------------------------------------------------*/ + +#define LOG2_PERIODIC_SIZE 5 /* arbitrary; this matches OHCI */ +#define PERIODIC_SIZE (1 << LOG2_PERIODIC_SIZE) + +struct isp116x { + spinlock_t lock; + struct work_struct rh_resume; + + void __iomem *addr_reg; + void __iomem *data_reg; + + struct isp116x_platform_data *board; + + struct proc_dir_entry *pde; + unsigned long stat1, stat2, stat4, stat8, stat16; + + /* HC registers */ + u32 intenb; /* "OHCI" interrupts */ + u16 irqenb; /* uP interrupts */ + + /* Root hub registers */ + u32 rhdesca; + u32 rhdescb; + u32 rhstatus; + u32 rhport[2]; + + /* async schedule: control, bulk */ + struct list_head async; + + /* periodic schedule: int */ + u16 load[PERIODIC_SIZE]; + struct isp116x_ep *periodic[PERIODIC_SIZE]; + unsigned periodic_count; + u16 fmindex; + + /* Schedule for the current frame */ + struct isp116x_ep *atl_active; + int atl_buflen; + int atl_bufshrt; + int atl_last_dir; + atomic_t atl_finishing; +}; + +static inline struct isp116x *hcd_to_isp116x(struct usb_hcd *hcd) +{ + return (struct isp116x *)(hcd->hcd_priv); +} + +static inline struct usb_hcd *isp116x_to_hcd(struct isp116x *isp116x) +{ + return container_of((void *)isp116x, struct usb_hcd, hcd_priv); +} + +struct isp116x_ep { + struct usb_host_endpoint *hep; + struct usb_device *udev; + struct ptd ptd; + + u8 maxpacket; + u8 epnum; + u8 nextpid; + u16 error_count; + u16 length; /* of current packet */ + unsigned char *data; /* to databuf */ + /* queue of active EP's (the ones scheduled for the + current frame) */ + struct isp116x_ep *active; + + /* periodic schedule */ + u16 period; + u16 branch; + u16 load; + struct isp116x_ep *next; + + /* async schedule */ + struct list_head schedule; +}; + +/*-------------------------------------------------------------------------*/ + +#ifdef DEBUG +#define DBG(stuff...) printk(KERN_DEBUG "116x: " stuff) +#else +#define DBG(stuff...) do{}while(0) +#endif + +#ifdef VERBOSE +# define VDBG DBG +#else +# define VDBG(stuff...) do{}while(0) +#endif + +#define ERR(stuff...) printk(KERN_ERR "116x: " stuff) +#define WARN(stuff...) printk(KERN_WARNING "116x: " stuff) +#define INFO(stuff...) printk(KERN_INFO "116x: " stuff) + +/* ------------------------------------------------- */ + +#if defined(USE_PLATFORM_DELAY) +#if defined(USE_NDELAY) +#error USE_PLATFORM_DELAY and USE_NDELAY simultaneously defined. +#endif +#define isp116x_delay(h,d) (h)->board->delay( \ + isp116x_to_hcd(h)->self.controller,d) +#define isp116x_check_platform_delay(h) ((h)->board->delay == NULL) +#elif defined(USE_NDELAY) +#define isp116x_delay(h,d) ndelay(d) +#define isp116x_check_platform_delay(h) 0 +#else +#define isp116x_delay(h,d) do{}while(0) +#define isp116x_check_platform_delay(h) 0 +#endif + +#if defined(DEBUG) +#define IRQ_TEST() BUG_ON(!irqs_disabled()) +#else +#define IRQ_TEST() do{}while(0) +#endif + +static inline void isp116x_write_addr(struct isp116x *isp116x, unsigned reg) +{ + IRQ_TEST(); + writew(reg & 0xff, isp116x->addr_reg); + isp116x_delay(isp116x, 300); +} + +static inline void isp116x_write_data16(struct isp116x *isp116x, u16 val) +{ + writew(val, isp116x->data_reg); + isp116x_delay(isp116x, 150); +} + +static inline void isp116x_raw_write_data16(struct isp116x *isp116x, u16 val) +{ + __raw_writew(val, isp116x->data_reg); + isp116x_delay(isp116x, 150); +} + +static inline u16 isp116x_read_data16(struct isp116x *isp116x) +{ + u16 val; + + val = readw(isp116x->data_reg); + isp116x_delay(isp116x, 150); + return val; +} + +static inline u16 isp116x_raw_read_data16(struct isp116x *isp116x) +{ + u16 val; + + val = __raw_readw(isp116x->data_reg); + isp116x_delay(isp116x, 150); + return val; +} + +static inline void isp116x_write_data32(struct isp116x *isp116x, u32 val) +{ + writew(val & 0xffff, isp116x->data_reg); + isp116x_delay(isp116x, 150); + writew(val >> 16, isp116x->data_reg); + isp116x_delay(isp116x, 150); +} + +static inline u32 isp116x_read_data32(struct isp116x *isp116x) +{ + u32 val; + + val = (u32) readw(isp116x->data_reg); + isp116x_delay(isp116x, 150); + val |= ((u32) readw(isp116x->data_reg)) << 16; + isp116x_delay(isp116x, 150); + return val; +} + +/* Let's keep register access functions out of line. Hint: + we wait at least 150 ns at every access. +*/ +static u16 isp116x_read_reg16(struct isp116x *isp116x, unsigned reg) +{ + isp116x_write_addr(isp116x, reg); + return isp116x_read_data16(isp116x); +} + +static u32 isp116x_read_reg32(struct isp116x *isp116x, unsigned reg) +{ + isp116x_write_addr(isp116x, reg); + return isp116x_read_data32(isp116x); +} + +static void isp116x_write_reg16(struct isp116x *isp116x, unsigned reg, + unsigned val) +{ + isp116x_write_addr(isp116x, reg | ISP116x_WRITE_OFFSET); + isp116x_write_data16(isp116x, (u16) (val & 0xffff)); +} + +static void isp116x_write_reg32(struct isp116x *isp116x, unsigned reg, + unsigned val) +{ + isp116x_write_addr(isp116x, reg | ISP116x_WRITE_OFFSET); + isp116x_write_data32(isp116x, (u32) val); +} + +#define isp116x_show_reg(d,r) { \ + if ((r) < 0x20) { \ + DBG("%-12s[%02x]: %08x\n", #r, \ + r, isp116x_read_reg32(d, r)); \ + } else { \ + DBG("%-12s[%02x]: %04x\n", #r, \ + r, isp116x_read_reg16(d, r)); \ + } \ +} + +static inline void isp116x_show_regs(struct isp116x *isp116x) +{ + isp116x_show_reg(isp116x, HCREVISION); + isp116x_show_reg(isp116x, HCCONTROL); + isp116x_show_reg(isp116x, HCCMDSTAT); + isp116x_show_reg(isp116x, HCINTSTAT); + isp116x_show_reg(isp116x, HCINTENB); + isp116x_show_reg(isp116x, HCFMINTVL); + isp116x_show_reg(isp116x, HCFMREM); + isp116x_show_reg(isp116x, HCFMNUM); + isp116x_show_reg(isp116x, HCLSTHRESH); + isp116x_show_reg(isp116x, HCRHDESCA); + isp116x_show_reg(isp116x, HCRHDESCB); + isp116x_show_reg(isp116x, HCRHSTATUS); + isp116x_show_reg(isp116x, HCRHPORT1); + isp116x_show_reg(isp116x, HCRHPORT2); + isp116x_show_reg(isp116x, HCHWCFG); + isp116x_show_reg(isp116x, HCDMACFG); + isp116x_show_reg(isp116x, HCXFERCTR); + isp116x_show_reg(isp116x, HCuPINT); + isp116x_show_reg(isp116x, HCuPINTENB); + isp116x_show_reg(isp116x, HCCHIPID); + isp116x_show_reg(isp116x, HCSCRATCH); + isp116x_show_reg(isp116x, HCITLBUFLEN); + isp116x_show_reg(isp116x, HCATLBUFLEN); + isp116x_show_reg(isp116x, HCBUFSTAT); + isp116x_show_reg(isp116x, HCRDITL0LEN); + isp116x_show_reg(isp116x, HCRDITL1LEN); +} + +#if defined(URB_TRACE) + +#define PIPETYPE(pipe) ({ char *__s; \ + if (usb_pipecontrol(pipe)) __s = "ctrl"; \ + else if (usb_pipeint(pipe)) __s = "int"; \ + else if (usb_pipebulk(pipe)) __s = "bulk"; \ + else __s = "iso"; \ + __s;}) +#define PIPEDIR(pipe) ({ usb_pipein(pipe) ? "in" : "out"; }) +#define URB_NOTSHORT(urb) ({ (urb)->transfer_flags & URB_SHORT_NOT_OK ? \ + "short_not_ok" : ""; }) + +/* print debug info about the URB */ +static void urb_dbg(struct urb *urb, char *msg) +{ + unsigned int pipe; + + if (!urb) { + DBG("%s: zero urb\n", msg); + return; + } + pipe = urb->pipe; + DBG("%s: FA %d ep%d%s %s: len %d/%d %s\n", msg, + usb_pipedevice(pipe), usb_pipeendpoint(pipe), + PIPEDIR(pipe), PIPETYPE(pipe), + urb->transfer_buffer_length, urb->actual_length, URB_NOTSHORT(urb)); +} + +#else + +#define urb_dbg(urb,msg) do{}while(0) + +#endif /* ! defined(URB_TRACE) */ + +#if defined(PTD_TRACE) + +#define PTD_DIR_STR(ptd) ({char __c; \ + switch(PTD_GET_DIR(ptd)){ \ + case 0: __c = 's'; break; \ + case 1: __c = 'o'; break; \ + default: __c = 'i'; break; \ + }; __c;}) + +/* + Dump PTD info. The code documents the format + perfectly, right :) +*/ +static inline void dump_ptd(struct ptd *ptd) +{ + printk("td: %x %d%c%d %d,%d,%d %x %x%x%x\n", + PTD_GET_CC(ptd), PTD_GET_FA(ptd), + PTD_DIR_STR(ptd), PTD_GET_EP(ptd), + PTD_GET_COUNT(ptd), PTD_GET_LEN(ptd), PTD_GET_MPS(ptd), + PTD_GET_TOGGLE(ptd), PTD_GET_ACTIVE(ptd), + PTD_GET_SPD(ptd), PTD_GET_LAST(ptd)); +} + +static inline void dump_ptd_out_data(struct ptd *ptd, u8 * buf) +{ + int k; + + if (PTD_GET_DIR(ptd) != PTD_DIR_IN && PTD_GET_LEN(ptd)) { + printk("-> "); + for (k = 0; k < PTD_GET_LEN(ptd); ++k) + printk("%02x ", ((u8 *) buf)[k]); + printk("\n"); + } +} + +static inline void dump_ptd_in_data(struct ptd *ptd, u8 * buf) +{ + int k; + + if (PTD_GET_DIR(ptd) == PTD_DIR_IN && PTD_GET_COUNT(ptd)) { + printk("<- "); + for (k = 0; k < PTD_GET_COUNT(ptd); ++k) + printk("%02x ", ((u8 *) buf)[k]); + printk("\n"); + } + if (PTD_GET_LAST(ptd)) + printk("-\n"); +} + +#else + +#define dump_ptd(ptd) do{}while(0) +#define dump_ptd_in_data(ptd,buf) do{}while(0) +#define dump_ptd_out_data(ptd,buf) do{}while(0) + +#endif /* ! defined(PTD_TRACE) */ diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h new file mode 100644 index 00000000000..5f5a9d9bd6c --- /dev/null +++ b/include/linux/usb_isp116x.h @@ -0,0 +1,47 @@ + +/* + * Board initialization code should put one of these into dev->platform_data + * and place the isp116x onto platform_bus. + */ + +struct isp116x_platform_data { + /* Enable internal resistors on downstream ports */ + unsigned sel15Kres:1; + /* Chip's internal clock won't be stopped in suspended state. + Setting/unsetting this bit takes effect only if + 'remote_wakeup_enable' below is not set. */ + unsigned clknotstop:1; + /* On-chip overcurrent protection */ + unsigned oc_enable:1; + /* INT output polarity */ + unsigned int_act_high:1; + /* INT edge or level triggered */ + unsigned int_edge_triggered:1; + /* WAKEUP pin connected - NOT SUPPORTED */ + /* unsigned remote_wakeup_connected:1; */ + /* Wakeup by devices on usb bus enabled */ + unsigned remote_wakeup_enable:1; + /* Switch or not to switch (keep always powered) */ + unsigned no_power_switching:1; + /* Ganged port power switching (0) or individual port + power switching (1) */ + unsigned power_switching_mode:1; + /* Given port_power, msec/2 after power on till power good */ + u8 potpg; + /* Hardware reset set/clear. If implemented, this function must: + if set == 0, deassert chip's HW reset pin + otherwise, assert chip's HW reset pin */ + void (*reset) (struct device * dev, int set); + /* Hardware clock start/stop. If implemented, this function must: + if start == 0, stop the external clock + otherwise, start the external clock + */ + void (*clock) (struct device * dev, int start); + /* Inter-io delay (ns). The chip is picky about access timings; it + expects at least: + 150ns delay between consecutive accesses to DATA_REG, + 300ns delay between access to ADDR_REG and DATA_REG + OE, WE MUST NOT be changed during these intervals + */ + void (*delay) (struct device * dev, int delay); +}; -- cgit v1.2.3-70-g09d2 From 1bbc169621cbe502b9143a27eb12802a0f1d43a0 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sat, 7 May 2005 13:05:13 -0700 Subject: [PATCH] USB: gadget driver updates (SETUP api change) This updates most of the gadget framework to expect SETUP packets use USB byteorder (matching the annotation in and usage in the host side stack): - definition in - gadget drivers: Ethernet/RNDIS, serial/ACM, file_storage, gadgetfs. - dummy_hcd It also includes some other similar changes as suggested by "sparse", which was used to detect byteorder bugs. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/dummy_hcd.c | 3 --- drivers/usb/gadget/ether.c | 6 +++--- drivers/usb/gadget/file_storage.c | 19 +++++++++---------- drivers/usb/gadget/inode.c | 12 ++++++------ drivers/usb/gadget/serial.c | 36 +++++++++++++++++++----------------- drivers/usb/gadget/zero.c | 6 +++--- include/linux/usb_gadget.h | 2 +- 7 files changed, 41 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c index 73d2f24050a..f9540adf2a4 100644 --- a/drivers/usb/gadget/dummy_hcd.c +++ b/drivers/usb/gadget/dummy_hcd.c @@ -1267,9 +1267,6 @@ restart: struct dummy_ep *ep2; setup = *(struct usb_ctrlrequest*) urb->setup_packet; - le16_to_cpus (&setup.wIndex); - le16_to_cpus (&setup.wValue); - le16_to_cpus (&setup.wLength); if (setup.wLength != urb->transfer_buffer_length) { maybe_set_status (urb, -EOVERFLOW); goto return_urb; diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index 3830a0a0fd5..9f8413e3c10 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -1277,9 +1277,9 @@ eth_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct eth_dev *dev = get_gadget_data (gadget); struct usb_request *req = dev->req; int value = -EOPNOTSUPP; - u16 wIndex = (__force u16) ctrl->wIndex; - u16 wValue = (__force u16) ctrl->wValue; - u16 wLength = (__force u16) ctrl->wLength; + u16 wIndex = le16_to_cpu(ctrl->wIndex); + u16 wValue = le16_to_cpu(ctrl->wValue); + u16 wLength = le16_to_cpu(ctrl->wLength); /* descriptors just go into the pre-allocated ep0 buffer, * while config change events may enable network traffic. diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index f5ce45c4b2a..4f57085619b 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -819,7 +819,7 @@ static void inline put_be32(u8 *buf, u32 val) buf[0] = val >> 24; buf[1] = val >> 16; buf[2] = val >> 8; - buf[3] = val; + buf[3] = val & 0xff; } @@ -1277,8 +1277,8 @@ static int class_setup_req(struct fsg_dev *fsg, { struct usb_request *req = fsg->ep0req; int value = -EOPNOTSUPP; - u16 w_index = ctrl->wIndex; - u16 w_length = ctrl->wLength; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_length = le16_to_cpu(ctrl->wLength); if (!fsg->config) return value; @@ -1345,7 +1345,7 @@ static int class_setup_req(struct fsg_dev *fsg, "unknown class-specific control req " "%02x.%02x v%04x i%04x l%u\n", ctrl->bRequestType, ctrl->bRequest, - ctrl->wValue, w_index, w_length); + le16_to_cpu(ctrl->wValue), w_index, w_length); return value; } @@ -1359,8 +1359,8 @@ static int standard_setup_req(struct fsg_dev *fsg, { struct usb_request *req = fsg->ep0req; int value = -EOPNOTSUPP; - u16 w_index = ctrl->wIndex; - u16 w_value = ctrl->wValue; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); /* Usually this just stores reply data in the pre-allocated ep0 buffer, * but config change events will also reconfigure hardware. */ @@ -1469,7 +1469,7 @@ static int standard_setup_req(struct fsg_dev *fsg, VDBG(fsg, "unknown control req %02x.%02x v%04x i%04x l%u\n", ctrl->bRequestType, ctrl->bRequest, - w_value, w_index, ctrl->wLength); + w_value, w_index, le16_to_cpu(ctrl->wLength)); } return value; @@ -1481,7 +1481,7 @@ static int fsg_setup(struct usb_gadget *gadget, { struct fsg_dev *fsg = get_gadget_data(gadget); int rc; - int w_length = ctrl->wLength; + int w_length = le16_to_cpu(ctrl->wLength); ++fsg->ep0_req_tag; // Record arrival of a new request fsg->ep0req->context = NULL; @@ -1497,8 +1497,7 @@ static int fsg_setup(struct usb_gadget *gadget, if (rc >= 0 && rc != DELAYED_STATUS) { rc = min(rc, w_length); fsg->ep0req->length = rc; - fsg->ep0req->zero = (rc < w_length && - (rc % gadget->ep0->maxpacket) == 0); + fsg->ep0req->zero = rc < w_length; fsg->ep0req_name = (ctrl->bRequestType & USB_DIR_IN ? "ep0-in" : "ep0-out"); rc = ep0_queue(fsg); diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 1e5e6ddef78..020815397a4 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -417,8 +417,8 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) goto free1; value = ep_io (data, kbuf, len); - VDEBUG (data->dev, "%s read %d OUT, status %d\n", - data->name, len, value); + VDEBUG (data->dev, "%s read %zu OUT, status %d\n", + data->name, len, (int) value); if (value >= 0 && copy_to_user (buf, kbuf, value)) value = -EFAULT; @@ -465,8 +465,8 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } value = ep_io (data, kbuf, len); - VDEBUG (data->dev, "%s write %d IN, status %d\n", - data->name, len, value); + VDEBUG (data->dev, "%s write %zu IN, status %d\n", + data->name, len, (int) value); free1: up (&data->lock); kfree (kbuf); @@ -1318,8 +1318,8 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct usb_request *req = dev->req; int value = -EOPNOTSUPP; struct usb_gadgetfs_event *event; - u16 w_value = ctrl->wValue; - u16 w_length = ctrl->wLength; + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); spin_lock (&dev->lock); dev->setup_abort = 0; diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index 4d591c764e3..9e4f1c6935a 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -300,18 +300,18 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed, u8 type, unsigned int index, int is_otg); static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len, - int kmalloc_flags); + unsigned kmalloc_flags); static void gs_free_req(struct usb_ep *ep, struct usb_request *req); static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len, - int kmalloc_flags); + unsigned kmalloc_flags); static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req); -static int gs_alloc_ports(struct gs_dev *dev, int kmalloc_flags); +static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags); static void gs_free_ports(struct gs_dev *dev); /* circular buffer */ -static struct gs_buf *gs_buf_alloc(unsigned int size, int kmalloc_flags); +static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags); static void gs_buf_free(struct gs_buf *gb); static void gs_buf_clear(struct gs_buf *gb); static unsigned int gs_buf_data_avail(struct gs_buf *gb); @@ -1607,9 +1607,9 @@ static int gs_setup(struct usb_gadget *gadget, int ret = -EOPNOTSUPP; struct gs_dev *dev = get_gadget_data(gadget); struct usb_request *req = dev->dev_ctrl_req; - u16 wIndex = ctrl->wIndex; - u16 wValue = ctrl->wValue; - u16 wLength = ctrl->wLength; + u16 wIndex = le16_to_cpu(ctrl->wIndex); + u16 wValue = le16_to_cpu(ctrl->wValue); + u16 wLength = le16_to_cpu(ctrl->wLength); switch (ctrl->bRequestType & USB_TYPE_MASK) { case USB_TYPE_STANDARD: @@ -1651,9 +1651,9 @@ static int gs_setup_standard(struct usb_gadget *gadget, int ret = -EOPNOTSUPP; struct gs_dev *dev = get_gadget_data(gadget); struct usb_request *req = dev->dev_ctrl_req; - u16 wIndex = ctrl->wIndex; - u16 wValue = ctrl->wValue; - u16 wLength = ctrl->wLength; + u16 wIndex = le16_to_cpu(ctrl->wIndex); + u16 wValue = le16_to_cpu(ctrl->wValue); + u16 wLength = le16_to_cpu(ctrl->wLength); switch (ctrl->bRequest) { case USB_REQ_GET_DESCRIPTOR: @@ -1782,9 +1782,9 @@ static int gs_setup_class(struct usb_gadget *gadget, struct gs_dev *dev = get_gadget_data(gadget); struct gs_port *port = dev->dev_port[0]; /* ACM only has one port */ struct usb_request *req = dev->dev_ctrl_req; - u16 wIndex = ctrl->wIndex; - u16 wValue = ctrl->wValue; - u16 wLength = ctrl->wLength; + u16 wIndex = le16_to_cpu(ctrl->wIndex); + u16 wValue = le16_to_cpu(ctrl->wValue); + u16 wLength = le16_to_cpu(ctrl->wLength); switch (ctrl->bRequest) { case USB_CDC_REQ_SET_LINE_CODING: @@ -2119,7 +2119,8 @@ static int gs_build_config_buf(u8 *buf, enum usb_device_speed speed, * Allocate a usb_request and its buffer. Returns a pointer to the * usb_request or NULL if there is an error. */ -static struct usb_request *gs_alloc_req(struct usb_ep *ep, unsigned int len, int kmalloc_flags) +static struct usb_request * +gs_alloc_req(struct usb_ep *ep, unsigned int len, unsigned kmalloc_flags) { struct usb_request *req; @@ -2159,7 +2160,8 @@ static void gs_free_req(struct usb_ep *ep, struct usb_request *req) * Allocates a request and its buffer, using the given * endpoint, buffer len, and kmalloc flags. */ -static struct gs_req_entry *gs_alloc_req_entry(struct usb_ep *ep, unsigned len, int kmalloc_flags) +static struct gs_req_entry * +gs_alloc_req_entry(struct usb_ep *ep, unsigned len, unsigned kmalloc_flags) { struct gs_req_entry *req; @@ -2200,7 +2202,7 @@ static void gs_free_req_entry(struct usb_ep *ep, struct gs_req_entry *req) * * The device lock is normally held when calling this function. */ -static int gs_alloc_ports(struct gs_dev *dev, int kmalloc_flags) +static int gs_alloc_ports(struct gs_dev *dev, unsigned kmalloc_flags) { int i; struct gs_port *port; @@ -2282,7 +2284,7 @@ static void gs_free_ports(struct gs_dev *dev) * * Allocate a circular buffer and all associated memory. */ -static struct gs_buf *gs_buf_alloc(unsigned int size, int kmalloc_flags) +static struct gs_buf *gs_buf_alloc(unsigned int size, unsigned kmalloc_flags) { struct gs_buf *gb; diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 6e49432071a..a6e035e2447 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -919,9 +919,9 @@ zero_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct zero_dev *dev = get_gadget_data (gadget); struct usb_request *req = dev->req; int value = -EOPNOTSUPP; - u16 w_index = ctrl->wIndex; - u16 w_value = ctrl->wValue; - u16 w_length = ctrl->wLength; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); /* usually this stores reply data in the pre-allocated ep0 buffer, * but config change events will reconfigure hardware. diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h index 9bba9997947..b00f127cb44 100644 --- a/include/linux/usb_gadget.h +++ b/include/linux/usb_gadget.h @@ -711,7 +711,7 @@ usb_gadget_disconnect (struct usb_gadget *gadget) * the hardware level driver. Most calls must be handled by * the gadget driver, including descriptor and configuration * management. The 16 bit members of the setup data are in - * cpu order. Called in_interrupt; this may not sleep. Driver + * USB byte order. Called in_interrupt; this may not sleep. Driver * queues a response to ep0, or returns negative to stall. * @disconnect: Invoked after all transfers have been stopped, * when the host is disconnected. May be called in_interrupt; this -- cgit v1.2.3-70-g09d2 From 5da0106f0b9b13afa4a902c01d4c98b002df55ff Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 31 May 2005 10:21:11 -0700 Subject: [PATCH] USB: wireless usb declarations This provides declarations for new requests, descriptors, and bitfields as defined in the Wireless USB 1.0 spec. Device support will involve a new "Wire Adapter" device class, connecting a USB Host to a cluster of wireless USB devices. There will be two adapter types: * Host Wireless Adapter (HWA): the downstream link is wireless, which connects a wireless USB host to wireless USB devices (not unlike like a hub) including to the second type of adapter. * Device Wireless Adapter (DWA): the upstream link is wireless, for connecting existing USB devices through wired links into the cluser. All wireless USB devices will need persistent (and secure!) key storage, and it's probable that Linux -- or device firmware -- will need to be involved with that to bootstrap the initial secure key exchange. Some user interface is required in that initial key exchange, and since the most "hands-off" one is a wired USB link, I suspect wireless operation will usually not be the only mode for wireless USB devices. (Plus, devices can recharge batteries using wired USB...) All other key exchange protocols need error prone user interactions, like copying and/or verifying keys. It'll likely be a while before we have commercial Wireless USB hardware, much less Linux implementations that know how to use it. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_ch9.h | 183 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_ch9.h b/include/linux/usb_ch9.h index f5fe94e09a0..39e7ff4ffd2 100644 --- a/include/linux/usb_ch9.h +++ b/include/linux/usb_ch9.h @@ -6,11 +6,14 @@ * * - the master/host side Linux-USB kernel driver API; * - the "usbfs" user space API; and - * - (eventually) a Linux "gadget" slave/device side driver API. + * - the Linux "gadget" slave/device/peripheral side driver API. * * USB 2.0 adds an additional "On The Go" (OTG) mode, which lets systems * act either as a USB master/host or as a USB slave/device. That means - * the master and slave side APIs will benefit from working well together. + * the master and slave side APIs benefit from working well together. + * + * There's also "Wireless USB", using low power short range radios for + * peripheral interconnection but otherwise building on the USB framework. */ #ifndef __LINUX_USB_CH9_H @@ -68,6 +71,18 @@ #define USB_REQ_SET_INTERFACE 0x0B #define USB_REQ_SYNCH_FRAME 0x0C +#define USB_REQ_SET_ENCRYPTION 0x0D /* Wireless USB */ +#define USB_REQ_GET_ENCRYPTION 0x0E +#define USB_REQ_SET_HANDSHAKE 0x0F +#define USB_REQ_GET_HANDSHAKE 0x10 +#define USB_REQ_SET_CONNECTION 0x11 +#define USB_REQ_SET_SECURITY_DATA 0x12 +#define USB_REQ_GET_SECURITY_DATA 0x13 +#define USB_REQ_SET_WUSB_DATA 0x14 +#define USB_REQ_LOOPBACK_DATA_WRITE 0x15 +#define USB_REQ_LOOPBACK_DATA_READ 0x16 +#define USB_REQ_SET_INTERFACE_DS 0x17 + /* * USB feature flags are written using USB_REQ_{CLEAR,SET}_FEATURE, and * are read as a bit array returned by USB_REQ_GET_STATUS. (So there @@ -75,10 +90,12 @@ */ #define USB_DEVICE_SELF_POWERED 0 /* (read only) */ #define USB_DEVICE_REMOTE_WAKEUP 1 /* dev may initiate wakeup */ -#define USB_DEVICE_TEST_MODE 2 /* (high speed only) */ -#define USB_DEVICE_B_HNP_ENABLE 3 /* dev may initiate HNP */ -#define USB_DEVICE_A_HNP_SUPPORT 4 /* RH port supports HNP */ -#define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* other RH port does */ +#define USB_DEVICE_TEST_MODE 2 /* (wired high speed only) */ +#define USB_DEVICE_BATTERY 2 /* (wireless) */ +#define USB_DEVICE_B_HNP_ENABLE 3 /* (otg) dev may initiate HNP */ +#define USB_DEVICE_WUSB_DEVICE 3 /* (wireless)*/ +#define USB_DEVICE_A_HNP_SUPPORT 4 /* (otg) RH port supports HNP */ +#define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* (otg) other RH port does */ #define USB_DEVICE_DEBUG_MODE 6 /* (special devices only) */ #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ @@ -135,6 +152,13 @@ struct usb_ctrlrequest { #define USB_DT_OTG 0x09 #define USB_DT_DEBUG 0x0a #define USB_DT_INTERFACE_ASSOCIATION 0x0b +/* these are from the Wireless USB spec */ +#define USB_DT_SECURITY 0x0c +#define USB_DT_KEY 0x0d +#define USB_DT_ENCRYPTION_TYPE 0x0e +#define USB_DT_BOS 0x0f +#define USB_DT_DEVICE_CAPABILITY 0x10 +#define USB_DT_WIRELESS_ENDPOINT_COMP 0x11 /* conventional codes for class-specific descriptors */ #define USB_DT_CS_DEVICE 0x21 @@ -192,6 +216,7 @@ struct usb_device_descriptor { #define USB_CLASS_CSCID 0x0b /* chip+ smart card */ #define USB_CLASS_CONTENT_SEC 0x0d /* content security */ #define USB_CLASS_VIDEO 0x0e +#define USB_CLASS_WIRELESS_CONTROLLER 0xe0 #define USB_CLASS_APP_SPEC 0xfe #define USB_CLASS_VENDOR_SPEC 0xff @@ -223,6 +248,7 @@ struct usb_config_descriptor { #define USB_CONFIG_ATT_ONE (1 << 7) /* must be set */ #define USB_CONFIG_ATT_SELFPOWER (1 << 6) /* self powered */ #define USB_CONFIG_ATT_WAKEUP (1 << 5) /* can wakeup */ +#define USB_CONFIG_ATT_BATTERY (1 << 4) /* battery powered */ /*-------------------------------------------------------------------------*/ @@ -289,6 +315,7 @@ struct usb_endpoint_descriptor { #define USB_ENDPOINT_XFER_ISOC 1 #define USB_ENDPOINT_XFER_BULK 2 #define USB_ENDPOINT_XFER_INT 3 +#define USB_ENDPOINT_MAX_ADJUSTABLE 0x80 /*-------------------------------------------------------------------------*/ @@ -350,6 +377,147 @@ struct usb_interface_assoc_descriptor { } __attribute__ ((packed)); +/*-------------------------------------------------------------------------*/ + +/* USB_DT_SECURITY: group of wireless security descriptors, including + * encryption types available for setting up a CC/association. + */ +struct usb_security_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __le16 wTotalLength; + __u8 bNumEncryptionTypes; +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_DT_KEY: used with {GET,SET}_SECURITY_DATA; only public keys + * may be retrieved. + */ +struct usb_key_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __u8 tTKID[3]; + __u8 bReserved; + __u8 bKeyData[0]; +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_DT_ENCRYPTION_TYPE: bundled in DT_SECURITY groups */ +struct usb_encryption_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __u8 bEncryptionType; +#define USB_ENC_TYPE_UNSECURE 0 +#define USB_ENC_TYPE_WIRED 1 /* non-wireless mode */ +#define USB_ENC_TYPE_CCM_1 2 /* aes128/cbc session */ +#define USB_ENC_TYPE_RSA_1 3 /* rsa3072/sha1 auth */ + __u8 bEncryptionValue; /* use in SET_ENCRYPTION */ + __u8 bAuthKeyIndex; +}; + + +/*-------------------------------------------------------------------------*/ + +/* USB_DT_BOS: group of wireless capabilities */ +struct usb_bos_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __le16 wTotalLength; + __u8 bNumDeviceCaps; +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_DT_DEVICE_CAPABILITY: grouped with BOS */ +struct usb_dev_cap_header { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; +}; + +#define USB_CAP_TYPE_WIRELESS_USB 1 + +struct usb_wireless_cap_descriptor { /* Ultra Wide Band */ + __u8 bLength; + __u8 bDescriptorType; + __u8 bDevCapabilityType; + + __u8 bmAttributes; +#define USB_WIRELESS_P2P_DRD (1 << 1) +#define USB_WIRELESS_BEACON_MASK (3 << 2) +#define USB_WIRELESS_BEACON_SELF (1 << 2) +#define USB_WIRELESS_BEACON_DIRECTED (2 << 2) +#define USB_WIRELESS_BEACON_NONE (3 << 2) + __le16 wPHYRates; /* bit rates, Mbps */ +#define USB_WIRELESS_PHY_53 (1 << 0) /* always set */ +#define USB_WIRELESS_PHY_80 (1 << 1) +#define USB_WIRELESS_PHY_107 (1 << 2) /* always set */ +#define USB_WIRELESS_PHY_160 (1 << 3) +#define USB_WIRELESS_PHY_200 (1 << 4) /* always set */ +#define USB_WIRELESS_PHY_320 (1 << 5) +#define USB_WIRELESS_PHY_400 (1 << 6) +#define USB_WIRELESS_PHY_480 (1 << 7) + __u8 bmTFITXPowerInfo; /* TFI power levels */ + __u8 bmFFITXPowerInfo; /* FFI power levels */ + __le16 bmBandGroup; + __u8 bReserved; +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_DT_WIRELESS_ENDPOINT_COMP: companion descriptor associated with + * each endpoint descriptor for a wireless device + */ +struct usb_wireless_ep_comp_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __u8 bMaxBurst; + __u8 bMaxSequence; + __le16 wMaxStreamDelay; + __le16 wOverTheAirPacketSize; + __u8 bOverTheAirInterval; + __u8 bmCompAttributes; +#define USB_ENDPOINT_SWITCH_MASK 0x03 /* in bmCompAttributes */ +#define USB_ENDPOINT_SWITCH_NO 0 +#define USB_ENDPOINT_SWITCH_SWITCH 1 +#define USB_ENDPOINT_SWITCH_SCALE 2 +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_REQ_SET_HANDSHAKE is a four-way handshake used between a wireless + * host and a device for connection set up, mutual authentication, and + * exchanging short lived session keys. The handshake depends on a CC. + */ +struct usb_handshake { + __u8 bMessageNumber; + __u8 bStatus; + __u8 tTKID[3]; + __u8 bReserved; + __u8 CDID[16]; + __u8 nonce[16]; + __u8 MIC[8]; +}; + +/*-------------------------------------------------------------------------*/ + +/* USB_REQ_SET_CONNECTION modifies or revokes a connection context (CC). + * A CC may also be set up using non-wireless secure channels (including + * wired USB!), and some devices may support CCs with multiple hosts. + */ +struct usb_connection_context { + __u8 CHID[16]; /* persistent host id */ + __u8 CDID[16]; /* device id (unique w/in host context) */ + __u8 CK[16]; /* connection key */ +}; + /*-------------------------------------------------------------------------*/ /* USB 2.0 defines three speeds, here's how Linux identifies them */ @@ -357,7 +525,8 @@ struct usb_interface_assoc_descriptor { enum usb_device_speed { USB_SPEED_UNKNOWN = 0, /* enumerating */ USB_SPEED_LOW, USB_SPEED_FULL, /* usb 1.1 */ - USB_SPEED_HIGH /* usb 2.0 */ + USB_SPEED_HIGH, /* usb 2.0 */ + USB_SPEED_VARIABLE, /* wireless (usb 2.5) */ }; enum usb_device_state { -- cgit v1.2.3-70-g09d2 From 8c8709334cec803368a432a33e0f2e116d48fe07 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Jun 2005 14:36:34 -0700 Subject: [PATCH] ppc32: Remove CONFIG_PMAC_PBOOK This patch removes CONFIG_PMAC_PBOOK (PowerBook support). This is now split into CONFIG_PMAC_MEDIABAY for the actual hotswap bay that some powerbooks have, CONFIG_PM for power management related code, and just left out of any CONFIG_* option for some generally useful stuff that can be used on non-laptops as well. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/pmac_sleep.S | 4 +- arch/ppc/platforms/pmac_time.c | 8 +- drivers/block/swim3.c | 10 +- drivers/char/misc.c | 3 - drivers/ide/ppc/pmac.c | 8 +- drivers/ieee1394/ohci1394.c | 10 +- drivers/macintosh/Kconfig | 35 ++----- drivers/macintosh/Makefile | 2 +- drivers/macintosh/adb.c | 10 +- drivers/macintosh/via-pmu.c | 70 ++++++-------- drivers/usb/host/ohci-pci.c | 13 +-- drivers/video/aty/aty128fb.c | 14 --- drivers/video/chipsfb.c | 176 +++++++++++++++++++----------------- include/linux/pmu.h | 6 +- sound/oss/dmasound/dmasound_awacs.c | 14 +-- sound/ppc/awacs.c | 8 +- sound/ppc/daca.c | 6 +- sound/ppc/pmac.c | 11 ++- sound/ppc/pmac.h | 2 +- sound/ppc/tumbler.c | 4 +- 20 files changed, 193 insertions(+), 221 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/platforms/pmac_sleep.S b/arch/ppc/platforms/pmac_sleep.S index f459ade1bd6..016a7464915 100644 --- a/arch/ppc/platforms/pmac_sleep.S +++ b/arch/ppc/platforms/pmac_sleep.S @@ -46,7 +46,7 @@ .section .text .align 5 -#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC) +#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) /* This gets called by via-pmu.c late during the sleep process. * The PMU was already send the sleep command and will shut us down @@ -382,7 +382,7 @@ turn_on_mmu: isync rfi -#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */ +#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ .section .data .balign L1_CACHE_LINE_SIZE diff --git a/arch/ppc/platforms/pmac_time.c b/arch/ppc/platforms/pmac_time.c index de60ccc7db9..778ce4fec36 100644 --- a/arch/ppc/platforms/pmac_time.c +++ b/arch/ppc/platforms/pmac_time.c @@ -206,7 +206,7 @@ via_calibrate_decr(void) return 1; } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * Reset the time after a sleep. */ @@ -238,7 +238,7 @@ time_sleep_notify(struct pmu_sleep_notifier *self, int when) static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = { time_sleep_notify, SLEEP_LEVEL_MISC, }; -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* * Query the OF and get the decr frequency. @@ -251,9 +251,9 @@ pmac_calibrate_decr(void) struct device_node *cpu; unsigned int freq, *fp; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM pmu_register_sleep_notifier(&time_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* We assume MacRISC2 machines have correct device-tree * calibration. That's better since the VIA itself seems diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 5b09cf154ac..e5f7494c00e 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -253,7 +253,7 @@ static int floppy_revalidate(struct gendisk *disk); static int swim3_add_device(struct device_node *swims); int swim3_init(void); -#ifndef CONFIG_PMAC_PBOOK +#ifndef CONFIG_PMAC_MEDIABAY #define check_media_bay(which, what) 1 #endif @@ -297,9 +297,11 @@ static void do_fd_request(request_queue_t * q) int i; for(i=0;imedia_bay && check_media_bay(fs->media_bay, MB_FD)) return -ENXIO; +#endif switch (cmd) { case FDEJECT: @@ -881,8 +885,10 @@ static int floppy_open(struct inode *inode, struct file *filp) int n, err = 0; if (fs->ref_count == 0) { +#ifdef CONFIG_PMAC_MEDIABAY if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD)) return -ENXIO; +#endif out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2); out_8(&sw->control_bic, 0xff); out_8(&sw->mode, 0x95); @@ -967,8 +973,10 @@ static int floppy_revalidate(struct gendisk *disk) struct swim3 __iomem *sw; int ret, n; +#ifdef CONFIG_PMAC_MEDIABAY if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD)) return -ENXIO; +#endif sw = fs->swim3; grab_drive(fs, revalidating, 0); diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 31cf84d6902..931efd58f87 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -308,9 +308,6 @@ static int __init misc_init(void) #endif #ifdef CONFIG_BVME6000 rtc_DP8570A_init(); -#endif -#ifdef CONFIG_PMAC_PBOOK - pmu_device_init(); #endif if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) { printk("unable to get major %d for misc devices\n", diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 569f1676744..818380b5fd2 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1324,9 +1324,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) /* XXX FIXME: Media bay stuff need re-organizing */ if (np->parent && np->parent->name && strcasecmp(np->parent->name, "media-bay") == 0) { -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PMAC_MEDIABAY media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq, hwif->index); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PMAC_MEDIABAY */ pmif->mediabay = 1; if (!bidp) pmif->aapl_bus_id = 1; @@ -1382,10 +1382,10 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) hwif->index, model_name[pmif->kind], pmif->aapl_bus_id, pmif->mediabay ? " (mediabay)" : "", hwif->irq); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PMAC_MEDIABAY if (pmif->mediabay && check_media_bay_by_base(pmif->regbase, MB_CD) == 0) hwif->noprobe = 0; -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PMAC_MEDIABAY */ hwif->sg_max_nents = MAX_DCMDS; diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 36e25ac823d..b3d3d22fde6 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -3538,8 +3538,8 @@ static void ohci1394_pci_remove(struct pci_dev *pdev) static int ohci1394_pci_resume (struct pci_dev *pdev) { -#ifdef CONFIG_PMAC_PBOOK - { +#ifdef CONFIG_PPC_PMAC + if (_machine == _MACH_Pmac) { struct device_node *of_node; /* Re-enable 1394 */ @@ -3547,7 +3547,7 @@ static int ohci1394_pci_resume (struct pci_dev *pdev) if (of_node) pmac_call_feature (PMAC_FTR_1394_ENABLE, of_node, 0, 1); } -#endif +#endif /* CONFIG_PPC_PMAC */ pci_enable_device(pdev); @@ -3557,8 +3557,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev) static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state) { -#ifdef CONFIG_PMAC_PBOOK - { +#ifdef CONFIG_PPC_PMAC + if (_machine == _MACH_Pmac) { struct device_node *of_node; /* Disable 1394 */ diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index b0ace5bc950..91691a6c004 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -86,33 +86,18 @@ config PMAC_SMU on the "SMU" system control chip which replaces the old PMU. If you don't know, say Y. -config PMAC_PBOOK - bool "Power management support for PowerBooks" - depends on ADB_PMU - ---help--- - This provides support for putting a PowerBook to sleep; it also - enables media bay support. Power management works on the - PB2400/3400/3500, Wallstreet, Lombard, and Bronze PowerBook G3 and - the Titanium Powerbook G4, as well as the iBooks. You should get - the power management daemon, pmud, to make it work and you must have - the /dev/pmu device (see the pmud README). - - Get pmud from . - - If you have a PowerBook, you should say Y here. - - You may also want to compile the dma sound driver as a module and - have it autoloaded. The act of removing the module shuts down the - sound hardware for more power savings. - -config PM - bool - depends on PPC_PMAC && ADB_PMU && PMAC_PBOOK - default y - config PMAC_APM_EMU tristate "APM emulation" - depends on PMAC_PBOOK + depends on PPC_PMAC && PPC32 && PM + +config PMAC_MEDIABAY + bool "Support PowerBook hotswap media bay" + depends on PPC_PMAC && PPC32 + help + This option adds support for older PowerBook's hotswap media bay + that can contains batteries, floppy drives, or IDE devices. PCI + devices are not fully supported in the bay as I never had one to + try with # made a separate option since backlight may end up beeing used # on non-powerbook machines (but only on PMU based ones AFAIK) diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index b3f88a4fcef..f5ae171dbfe 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_PPC_PMAC) += macio_asic.o -obj-$(CONFIG_PMAC_PBOOK) += mediabay.o +obj-$(CONFIG_PMAC_MEDIABAY) += mediabay.o obj-$(CONFIG_MAC_EMUMOUSEBTN) += mac_hid.o obj-$(CONFIG_INPUT_ADBHID) += adbhid.o obj-$(CONFIG_ANSLCD) += ans-lcd.o diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 493e2afa191..c0dc1e3fa58 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -90,7 +90,7 @@ static int sleepy_trackpad; static int autopoll_devs; int __adb_probe_sync; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM static int adb_notify_sleep(struct pmu_sleep_notifier *self, int when); static struct pmu_sleep_notifier adb_sleep_notifier = { adb_notify_sleep, @@ -320,9 +320,9 @@ int __init adb_init(void) printk(KERN_WARNING "Warning: no ADB interface detected\n"); adb_controller = NULL; } else { -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM pmu_register_sleep_notifier(&adb_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ #ifdef CONFIG_PPC if (machine_is_compatible("AAPL,PowerBook1998") || machine_is_compatible("PowerBook1,1")) @@ -337,7 +337,7 @@ int __init adb_init(void) __initcall(adb_init); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * notify clients before sleep and reset bus afterwards */ @@ -378,7 +378,7 @@ adb_notify_sleep(struct pmu_sleep_notifier *self, int when) } return PBOOK_SLEEP_OK; } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ static int do_adb_reset_bus(void) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 5375df03c6f..4a0a0ad2d03 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -155,10 +155,10 @@ static spinlock_t pmu_lock; static u8 pmu_intr_mask; static int pmu_version; static int drop_interrupts; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM static int option_lid_wakeup = 1; static int sleep_in_progress; -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ static unsigned long async_req_locks; static unsigned int pmu_irq_stats[11]; @@ -168,7 +168,6 @@ static struct proc_dir_entry *proc_pmu_irqstats; static struct proc_dir_entry *proc_pmu_options; static int option_server_mode; -#ifdef CONFIG_PMAC_PBOOK int pmu_battery_count; int pmu_cur_battery; unsigned int pmu_power_flags; @@ -176,7 +175,6 @@ struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES]; static int query_batt_timer = BATTERY_POLLING_COUNT; static struct adb_request batt_req; static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES]; -#endif /* CONFIG_PMAC_PBOOK */ #if defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_PMAC_BACKLIGHT) extern int disable_kernel_backlight; @@ -210,11 +208,9 @@ static int proc_get_irqstats(char *page, char **start, off_t off, static int pmu_set_backlight_level(int level, void* data); static int pmu_set_backlight_enable(int on, int level, void* data); #endif /* CONFIG_PMAC_BACKLIGHT */ -#ifdef CONFIG_PMAC_PBOOK static void pmu_pass_intr(unsigned char *data, int len); static int proc_get_batt(char *page, char **start, off_t off, int count, int *eof, void *data); -#endif /* CONFIG_PMAC_PBOOK */ static int proc_read_options(char *page, char **start, off_t off, int count, int *eof, void *data); static int proc_write_options(struct file *file, const char __user *buffer, @@ -407,9 +403,7 @@ static int __init via_pmu_start(void) bright_req_1.complete = 1; bright_req_2.complete = 1; -#ifdef CONFIG_PMAC_PBOOK batt_req.complete = 1; -#endif #ifdef CONFIG_PPC32 if (pmu_kind == PMU_KEYLARGO_BASED) @@ -468,7 +462,7 @@ static int __init via_pmu_dev_init(void) register_backlight_controller(&pmu_backlight_controller, NULL, "pmu"); #endif /* CONFIG_PMAC_BACKLIGHT */ -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PPC32 if (machine_is_compatible("AAPL,3400/2400") || machine_is_compatible("AAPL,3500")) { int mb = pmac_call_feature(PMAC_FTR_GET_MB_INFO, @@ -496,20 +490,19 @@ static int __init via_pmu_dev_init(void) pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART; } } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PPC32 */ + /* Create /proc/pmu */ proc_pmu_root = proc_mkdir("pmu", NULL); if (proc_pmu_root) { -#ifdef CONFIG_PMAC_PBOOK - int i; + long i; for (i=0; i= 0) p += sprintf(p, "lid_wakeup=%d\n", option_lid_wakeup); -#endif /* CONFIG_PMAC_PBOOK */ +#endif if (pmu_kind == PMU_KEYLARGO_BASED) p += sprintf(p, "server_mode=%d\n", option_server_mode); @@ -932,12 +917,12 @@ proc_write_options(struct file *file, const char __user *buffer, *(val++) = 0; while(*val == ' ') val++; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM if (pmu_kind == PMU_KEYLARGO_BASED && pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) >= 0) if (!strcmp(label, "lid_wakeup")) option_lid_wakeup = ((*val) == '1'); -#endif /* CONFIG_PMAC_PBOOK */ +#endif if (pmu_kind == PMU_KEYLARGO_BASED && !strcmp(label, "server_mode")) { int new_value; new_value = ((*val) == '1'); @@ -1432,7 +1417,6 @@ next: } /* Tick interrupt */ else if ((1 << pirq) & PMU_INT_TICK) { -#ifdef CONFIG_PMAC_PBOOK /* Environement or tick interrupt, query batteries */ if (pmu_battery_count) { if ((--query_batt_timer) == 0) { @@ -1447,7 +1431,6 @@ next: pmu_pass_intr(data, len); } else { pmu_pass_intr(data, len); -#endif /* CONFIG_PMAC_PBOOK */ } goto next; } @@ -2062,7 +2045,7 @@ pmu_i2c_simple_write(int bus, int addr, u8* data, int len) return -1; } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM static LIST_HEAD(sleep_notifiers); @@ -2715,6 +2698,8 @@ powerbook_sleep_3400(void) return 0; } +#endif /* CONFIG_PM */ + /* * Support for /dev/pmu device */ @@ -2894,11 +2879,11 @@ static int __pmac pmu_ioctl(struct inode * inode, struct file *filp, u_int cmd, u_long arg) { - struct pmu_private *pp = filp->private_data; __u32 __user *argp = (__u32 __user *)arg; - int error; + int error = -EINVAL; switch (cmd) { +#ifdef CONFIG_PM case PMU_IOC_SLEEP: if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -2920,12 +2905,13 @@ pmu_ioctl(struct inode * inode, struct file *filp, error = -ENOSYS; } sleep_in_progress = 0; - return error; + break; case PMU_IOC_CAN_SLEEP: if (pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) < 0) return put_user(0, argp); else return put_user(1, argp); +#endif /* CONFIG_PM */ #ifdef CONFIG_PMAC_BACKLIGHT /* Backlight should have its own device or go via @@ -2946,11 +2932,13 @@ pmu_ioctl(struct inode * inode, struct file *filp, error = get_user(value, argp); if (!error) error = set_backlight_level(value); - return error; + break; } #ifdef CONFIG_INPUT_ADBHID case PMU_IOC_GRAB_BACKLIGHT: { + struct pmu_private *pp = filp->private_data; unsigned long flags; + if (pp->backlight_locker) return 0; pp->backlight_locker = 1; @@ -2966,7 +2954,7 @@ pmu_ioctl(struct inode * inode, struct file *filp, case PMU_IOC_HAS_ADB: return put_user(pmu_has_adb, argp); } - return -EINVAL; + return error; } static struct file_operations pmu_device_fops __pmacdata = { @@ -2982,14 +2970,16 @@ static struct miscdevice pmu_device __pmacdata = { PMU_MINOR, "pmu", &pmu_device_fops }; -void pmu_device_init(void) +static int pmu_device_init(void) { if (!via) - return; + return 0; if (misc_register(&pmu_device) < 0) printk(KERN_ERR "via-pmu: cannot register misc device.\n"); + return 0; } -#endif /* CONFIG_PMAC_PBOOK */ +device_initcall(pmu_device_init); + #ifdef DEBUG_SLEEP static inline void __pmac @@ -3157,12 +3147,12 @@ EXPORT_SYMBOL(pmu_i2c_combined_read); EXPORT_SYMBOL(pmu_i2c_stdsub_write); EXPORT_SYMBOL(pmu_i2c_simple_read); EXPORT_SYMBOL(pmu_i2c_simple_write); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM EXPORT_SYMBOL(pmu_register_sleep_notifier); EXPORT_SYMBOL(pmu_unregister_sleep_notifier); EXPORT_SYMBOL(pmu_enable_irled); EXPORT_SYMBOL(pmu_battery_count); EXPORT_SYMBOL(pmu_batteries); EXPORT_SYMBOL(pmu_power_flags); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 57fd07d0054..eede6be098d 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -14,14 +14,11 @@ * This file is licenced under the GPL. */ -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PPC_PMAC #include #include #include #include -#ifndef CONFIG_PM -# define CONFIG_PM -#endif #endif #ifndef CONFIG_PCI @@ -132,7 +129,7 @@ static int ohci_pci_suspend (struct usb_hcd *hcd, pm_message_t message) /* let things settle down a bit */ msleep (100); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PPC_PMAC if (_machine == _MACH_Pmac) { struct device_node *of_node; @@ -141,7 +138,7 @@ static int ohci_pci_suspend (struct usb_hcd *hcd, pm_message_t message) if (of_node) pmac_call_feature(PMAC_FTR_USB_ENABLE, of_node, 0, 0); } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PPC_PMAC */ return 0; } @@ -151,7 +148,7 @@ static int ohci_pci_resume (struct usb_hcd *hcd) struct ohci_hcd *ohci = hcd_to_ohci (hcd); int retval = 0; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PPC_PMAC if (_machine == _MACH_Pmac) { struct device_node *of_node; @@ -160,7 +157,7 @@ static int ohci_pci_resume (struct usb_hcd *hcd) if (of_node) pmac_call_feature (PMAC_FTR_USB_ENABLE, of_node, 0, 1); } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PPC_PMAC */ /* resume root hub */ if (time_before (jiffies, ohci->next_statechange)) diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 9789115980a..7bc1d44d881 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -350,10 +350,8 @@ static int default_vmode __initdata = VMODE_1024_768_60; static int default_cmode __initdata = CMODE_8; #endif -#ifdef CONFIG_PMAC_PBOOK static int default_crt_on __initdata = 0; static int default_lcd_on __initdata = 1; -#endif #ifdef CONFIG_MTRR static int mtrr = 1; @@ -1249,7 +1247,6 @@ static int aty128_crtc_to_var(const struct aty128_crtc *crtc, return 0; } -#ifdef CONFIG_PMAC_PBOOK static void aty128_set_crt_enable(struct aty128fb_par *par, int on) { if (on) { @@ -1284,7 +1281,6 @@ static void aty128_set_lcd_enable(struct aty128fb_par *par, int on) aty_st_le32(LVDS_GEN_CNTL, reg); } } -#endif /* CONFIG_PMAC_PBOOK */ static void aty128_set_pll(struct aty128_pll *pll, const struct aty128fb_par *par) { @@ -1491,12 +1487,10 @@ static int aty128fb_set_par(struct fb_info *info) info->fix.visual = par->crtc.bpp == 8 ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR; -#ifdef CONFIG_PMAC_PBOOK if (par->chip_gen == rage_M3) { aty128_set_crt_enable(par, par->crt_on); aty128_set_lcd_enable(par, par->lcd_on); } -#endif if (par->accel_flags & FB_ACCELF_TEXT) aty128_init_engine(par); @@ -1652,7 +1646,6 @@ static int __init aty128fb_setup(char *options) return 0; while ((this_opt = strsep(&options, ",")) != NULL) { -#ifdef CONFIG_PMAC_PBOOK if (!strncmp(this_opt, "lcd:", 4)) { default_lcd_on = simple_strtoul(this_opt+4, NULL, 0); continue; @@ -1660,7 +1653,6 @@ static int __init aty128fb_setup(char *options) default_crt_on = simple_strtoul(this_opt+4, NULL, 0); continue; } -#endif #ifdef CONFIG_MTRR if(!strncmp(this_opt, "nomtrr", 6)) { mtrr = 0; @@ -1752,10 +1744,8 @@ static int __init aty128_init(struct pci_dev *pdev, const struct pci_device_id * info->fbops = &aty128fb_ops; info->flags = FBINFO_FLAG_DEFAULT; -#ifdef CONFIG_PMAC_PBOOK par->lcd_on = default_lcd_on; par->crt_on = default_crt_on; -#endif var = default_var; #ifdef CONFIG_PPC_PMAC @@ -2035,12 +2025,10 @@ static int aty128fb_blank(int blank, struct fb_info *fb) aty_st_8(CRTC_EXT_CNTL+1, state); -#ifdef CONFIG_PMAC_PBOOK if (par->chip_gen == rage_M3) { aty128_set_crt_enable(par, par->crt_on && !blank); aty128_set_lcd_enable(par, par->lcd_on && !blank); } -#endif #ifdef CONFIG_PMAC_BACKLIGHT if ((_machine == _MACH_Pmac) && !blank) set_backlight_enable(1); @@ -2124,7 +2112,6 @@ static int aty128fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, static int aty128fb_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg, struct fb_info *info) { -#ifdef CONFIG_PMAC_PBOOK struct aty128fb_par *par = info->par; u32 value; int rc; @@ -2149,7 +2136,6 @@ static int aty128fb_ioctl(struct inode *inode, struct file *file, u_int cmd, value = (par->crt_on << 1) | par->lcd_on; return put_user(value, (__u32 __user *)arg); } -#endif return -EINVAL; } diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c index 95e72550d43..e75a965ec76 100644 --- a/drivers/video/chipsfb.c +++ b/drivers/video/chipsfb.c @@ -28,22 +28,17 @@ #include #include #include +#include #include #ifdef CONFIG_PMAC_BACKLIGHT #include #endif -#ifdef CONFIG_PMAC_PBOOK -#include -#include -#endif /* * Since we access the display with inb/outb to fixed port numbers, * we can only handle one 6555x chip. -- paulus */ -static struct fb_info chipsfb_info; - #define write_ind(num, val, ap, dp) do { \ outb((num), (ap)); outb((val), (dp)); \ } while (0) @@ -74,14 +69,6 @@ static struct fb_info chipsfb_info; inb(0x3da); read_ind(num, var, 0x3c0, 0x3c1); \ } while (0) -#ifdef CONFIG_PMAC_PBOOK -static unsigned char *save_framebuffer; -int chips_sleep_notify(struct pmu_sleep_notifier *self, int when); -static struct pmu_sleep_notifier chips_sleep_notifier = { - chips_sleep_notify, SLEEP_LEVEL_VIDEO, -}; -#endif - /* * Exported functions */ @@ -356,6 +343,8 @@ static struct fb_var_screeninfo chipsfb_var __initdata = { static void __init init_chips(struct fb_info *p, unsigned long addr) { + memset(p->screen_base, 0, 0x100000); + p->fix = chipsfb_fix; p->fix.smem_start = addr; @@ -366,34 +355,41 @@ static void __init init_chips(struct fb_info *p, unsigned long addr) fb_alloc_cmap(&p->cmap, 256, 0); - if (register_framebuffer(p) < 0) { - printk(KERN_ERR "C&T 65550 framebuffer failed to register\n"); - return; - } - - printk(KERN_INFO "fb%d: Chips 65550 frame buffer (%dK RAM detected)\n", - p->node, p->fix.smem_len / 1024); - chips_hw_init(); } static int __devinit chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) { - struct fb_info *p = &chipsfb_info; + struct fb_info *p; unsigned long addr, size; unsigned short cmd; + int rc = -ENODEV; + + if (pci_enable_device(dp) < 0) { + dev_err(&dp->dev, "Cannot enable PCI device\n"); + goto err_out; + } if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) - return -ENODEV; + goto err_disable; addr = pci_resource_start(dp, 0); size = pci_resource_len(dp, 0); if (addr == 0) - return -ENODEV; - if (p->screen_base != 0) - return -EBUSY; - if (!request_mem_region(addr, size, "chipsfb")) - return -EBUSY; + goto err_disable; + + p = framebuffer_alloc(0, &dp->dev); + if (p == NULL) { + dev_err(&dp->dev, "Cannot allocate framebuffer structure\n"); + rc = -ENOMEM; + goto err_disable; + } + + if (pci_request_region(dp, 0, "chipsfb") != 0) { + dev_err(&dp->dev, "Cannot request framebuffer\n"); + rc = -EBUSY; + goto err_release_fb; + } #ifdef __BIG_ENDIAN addr += 0x800000; // Use big-endian aperture @@ -411,37 +407,89 @@ chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) set_backlight_enable(1); #endif /* CONFIG_PMAC_BACKLIGHT */ +#ifdef CONFIG_PPC p->screen_base = __ioremap(addr, 0x200000, _PAGE_NO_CACHE); +#else + p->screen_base = ioremap(addr, 0x200000); +#endif if (p->screen_base == NULL) { - release_mem_region(addr, size); - return -ENOMEM; + dev_err(&dp->dev, "Cannot map framebuffer\n"); + rc = -ENOMEM; + goto err_release_pci; } + + pci_set_drvdata(dp, p); p->device = &dp->dev; + init_chips(p, addr); -#ifdef CONFIG_PMAC_PBOOK - pmu_register_sleep_notifier(&chips_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ + if (register_framebuffer(p) < 0) { + dev_err(&dp->dev,"C&T 65550 framebuffer failed to register\n"); + goto err_unmap; + } + + dev_info(&dp->dev,"fb%d: Chips 65550 frame buffer" + " (%dK RAM detected)\n", + p->node, p->fix.smem_len / 1024); - pci_set_drvdata(dp, p); return 0; + + err_unmap: + iounmap(p->screen_base); + err_release_pci: + pci_release_region(dp, 0); + err_release_fb: + framebuffer_release(p); + err_disable: + err_out: + return rc; } static void __devexit chipsfb_remove(struct pci_dev *dp) { struct fb_info *p = pci_get_drvdata(dp); - if (p != &chipsfb_info || p->screen_base == NULL) + if (p->screen_base == NULL) return; unregister_framebuffer(p); iounmap(p->screen_base); p->screen_base = NULL; - release_mem_region(pci_resource_start(dp, 0), pci_resource_len(dp, 0)); + pci_release_region(dp, 0); +} + +#ifdef CONFIG_PM +static int chipsfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct fb_info *p = pci_get_drvdata(pdev); + + if (state == pdev->dev.power.power_state) + return 0; + if (state != PM_SUSPEND_MEM) + goto done; + + acquire_console_sem(); + chipsfb_blank(1, p); + fb_set_suspend(p, 1); + release_console_sem(); + done: + pdev->dev.power.power_state = state; + return 0; +} + +static int chipsfb_pci_resume(struct pci_dev *pdev) +{ + struct fb_info *p = pci_get_drvdata(pdev); -#ifdef CONFIG_PMAC_PBOOK - pmu_unregister_sleep_notifier(&chips_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ + acquire_console_sem(); + fb_set_suspend(p, 0); + chipsfb_blank(0, p); + release_console_sem(); + + pdev->dev.power.power_state = PMSG_ON; + return 0; } +#endif /* CONFIG_PM */ + static struct pci_device_id chipsfb_pci_tbl[] = { { PCI_VENDOR_ID_CT, PCI_DEVICE_ID_CT_65550, PCI_ANY_ID, PCI_ANY_ID }, @@ -455,6 +503,10 @@ static struct pci_driver chipsfb_driver = { .id_table = chipsfb_pci_tbl, .probe = chipsfb_pci_init, .remove = __devexit_p(chipsfb_remove), +#ifdef CONFIG_PM + .suspend = chipsfb_pci_suspend, + .resume = chipsfb_pci_resume, +#endif }; int __init chips_init(void) @@ -472,48 +524,4 @@ static void __exit chipsfb_exit(void) pci_unregister_driver(&chipsfb_driver); } -#ifdef CONFIG_PMAC_PBOOK -/* - * Save the contents of the frame buffer when we go to sleep, - * and restore it when we wake up again. - */ -int -chips_sleep_notify(struct pmu_sleep_notifier *self, int when) -{ - struct fb_info *p = &chipsfb_info; - int nb = p->var.yres * p->fix.line_length; - - if (p->screen_base == NULL) - return PBOOK_SLEEP_OK; - - switch (when) { - case PBOOK_SLEEP_REQUEST: - save_framebuffer = vmalloc(nb); - if (save_framebuffer == NULL) - return PBOOK_SLEEP_REFUSE; - break; - case PBOOK_SLEEP_REJECT: - if (save_framebuffer) { - vfree(save_framebuffer); - save_framebuffer = NULL; - } - break; - case PBOOK_SLEEP_NOW: - chipsfb_blank(1, p); - if (save_framebuffer) - memcpy(save_framebuffer, p->screen_base, nb); - break; - case PBOOK_WAKE: - if (save_framebuffer) { - memcpy(p->screen_base, save_framebuffer, nb); - vfree(save_framebuffer); - save_framebuffer = NULL; - } - chipsfb_blank(0, p); - break; - } - return PBOOK_SLEEP_OK; -} -#endif /* CONFIG_PMAC_PBOOK */ - MODULE_LICENSE("GPL"); diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 6d73eada277..373bd3b9b33 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -166,7 +166,7 @@ extern int pmu_i2c_simple_read(int bus, int addr, u8* data, int len); extern int pmu_i2c_simple_write(int bus, int addr, u8* data, int len); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * Stuff for putting the powerbook to sleep and waking it again. * @@ -208,6 +208,8 @@ struct pmu_sleep_notifier int pmu_register_sleep_notifier(struct pmu_sleep_notifier* notifier); int pmu_unregister_sleep_notifier(struct pmu_sleep_notifier* notifier); +#endif /* CONFIG_PM */ + #define PMU_MAX_BATTERIES 2 /* values for pmu_power_flags */ @@ -235,6 +237,4 @@ extern int pmu_battery_count; extern struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES]; extern unsigned int pmu_power_flags; -#endif /* CONFIG_PMAC_PBOOK */ - #endif /* __KERNEL__ */ diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c index 33108661e67..2704e1598ad 100644 --- a/sound/oss/dmasound/dmasound_awacs.c +++ b/sound/oss/dmasound/dmasound_awacs.c @@ -255,7 +255,7 @@ static int awacs_burgundy_read_mvolume(unsigned address); static volatile struct dbdma_cmd *emergency_dbdma_cmd; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * Stuff for restoring after a sleep. */ @@ -263,7 +263,7 @@ static int awacs_sleep_notify(struct pmu_sleep_notifier *self, int when); struct pmu_sleep_notifier awacs_sleep_notifier = { awacs_sleep_notify, SLEEP_LEVEL_SOUND, }; -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* for (soft) sample rate translations */ int expand_bal; /* Balance factor for expanding (not volume!) */ @@ -675,7 +675,7 @@ static void PMacIrqCleanup(void) kfree(awacs_rx_cmd_space); kfree(beep_dbdma_cmd_space); kfree(beep_buf); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM pmu_unregister_sleep_notifier(&awacs_sleep_notifier); #endif } @@ -1415,7 +1415,7 @@ load_awacs(void) } } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * Save state when going to sleep, restore it afterwards. */ @@ -1551,7 +1551,7 @@ static int awacs_sleep_notify(struct pmu_sleep_notifier *self, int when) } return PBOOK_SLEEP_OK; } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* All the burgundy functions: */ @@ -3053,9 +3053,9 @@ printk("dmasound_pmac: Awacs/Screamer Codec Mfct: %d Rev %d\n", mfg, rev); if ((res=setup_beep())) return res ; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM pmu_register_sleep_notifier(&awacs_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* Powerbooks have odd ways of enabling inputs such as an expansion-bay CD or sound from an internal modem diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c index e052bd071e5..061e52d3d77 100644 --- a/sound/ppc/awacs.c +++ b/sound/ppc/awacs.c @@ -90,7 +90,7 @@ snd_pmac_awacs_write_noreg(pmac_t *chip, int reg, int val) snd_pmac_awacs_write(chip, val | (reg << 12)); } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* Recalibrate chip */ static void screamer_recalibrate(pmac_t *chip) { @@ -642,7 +642,7 @@ static void awacs_restore_all_regs(pmac_t *chip) } } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM static void snd_pmac_awacs_suspend(pmac_t *chip) { snd_pmac_awacs_write_noreg(chip, 1, (chip->awacs_reg[1] @@ -676,7 +676,7 @@ static void snd_pmac_awacs_resume(pmac_t *chip) } #endif } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ #ifdef PMAC_SUPPORT_AUTOMUTE /* @@ -883,7 +883,7 @@ snd_pmac_awacs_init(pmac_t *chip) * set lowlevel callbacks */ chip->set_format = snd_pmac_awacs_set_format; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM chip->suspend = snd_pmac_awacs_suspend; chip->resume = snd_pmac_awacs_resume; #endif diff --git a/sound/ppc/daca.c b/sound/ppc/daca.c index f24a9169361..a737f298e77 100644 --- a/sound/ppc/daca.c +++ b/sound/ppc/daca.c @@ -218,7 +218,7 @@ static snd_kcontrol_new_t daca_mixers[] = { }; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM static void daca_resume(pmac_t *chip) { pmac_daca_t *mix = chip->mixer_data; @@ -227,7 +227,7 @@ static void daca_resume(pmac_t *chip) mix->amp_on ? 0x05 : 0x04); daca_set_volume(mix); } -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ static void daca_cleanup(pmac_t *chip) @@ -275,7 +275,7 @@ int __init snd_pmac_daca_init(pmac_t *chip) return err; } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM chip->resume = daca_resume; #endif diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c index 080ef392846..75b8b742303 100644 --- a/sound/ppc/pmac.c +++ b/sound/ppc/pmac.c @@ -36,7 +36,7 @@ #include -#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK) +#ifdef CONFIG_PM static int snd_pmac_register_sleep_notifier(pmac_t *chip); static int snd_pmac_unregister_sleep_notifier(pmac_t *chip); static int snd_pmac_suspend(snd_card_t *card, pm_message_t state); @@ -782,7 +782,7 @@ static int snd_pmac_free(pmac_t *chip) } snd_pmac_sound_feature(chip, 0); -#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK) +#ifdef CONFIG_PM snd_pmac_unregister_sleep_notifier(chip); #endif @@ -1292,7 +1292,7 @@ int __init snd_pmac_new(snd_card_t *card, pmac_t **chip_return) /* Reset dbdma channels */ snd_pmac_dbdma_reset(chip); -#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK) +#ifdef CONFIG_PM /* add sleep notifier */ if (! snd_pmac_register_sleep_notifier(chip)) snd_card_set_pm_callback(chip->card, snd_pmac_suspend, snd_pmac_resume, chip); @@ -1316,7 +1316,7 @@ int __init snd_pmac_new(snd_card_t *card, pmac_t **chip_return) * sleep notify for powerbook */ -#if defined(CONFIG_PM) && defined(CONFIG_PMAC_PBOOK) +#ifdef CONFIG_PM /* * Save state when going to sleep, restore it afterwards. @@ -1414,4 +1414,5 @@ static int snd_pmac_unregister_sleep_notifier(pmac_t *chip) return 0; } -#endif /* CONFIG_PM && CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ + diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h index 0a84c05f714..582db522011 100644 --- a/sound/ppc/pmac.h +++ b/sound/ppc/pmac.h @@ -167,7 +167,7 @@ struct snd_pmac { void (*set_format)(pmac_t *chip); void (*update_automute)(pmac_t *chip, int do_notify); int (*detect_headphone)(pmac_t *chip); -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM void (*suspend)(pmac_t *chip); void (*resume)(pmac_t *chip); #endif diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c index 9332237cb6a..36c5d5d45bb 100644 --- a/sound/ppc/tumbler.c +++ b/sound/ppc/tumbler.c @@ -1128,7 +1128,7 @@ static void tumbler_reset_audio(pmac_t *chip) } } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* suspend mixer */ static void tumbler_suspend(pmac_t *chip) { @@ -1370,7 +1370,7 @@ int __init snd_pmac_tumbler_init(pmac_t *chip) if ((err = snd_ctl_add(chip->card, chip->drc_sw_ctl)) < 0) return err; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM chip->suspend = tumbler_suspend; chip->resume = tumbler_resume; #endif -- cgit v1.2.3-70-g09d2 From ffaa8bd6c904d1ab79b677905067349a5ff51d84 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 27 Jun 2005 14:36:36 -0700 Subject: [PATCH] seccomp: tsc disable I believe at least for seccomp it's worth to turn off the tsc, not just for HT but for the L2 cache too. So it's up to you, either you turn it off completely (which isn't very nice IMHO) or I recommend to apply this below patch. This has been tested successfully on x86-64 against current cogito repository (i686 compiles so I didn't bother testing ;). People selling the cpu through cpushare may appreciate this bit for a peace of mind. There's no way to get any timing info anymore with this applied (gettimeofday is forbidden of course). The seccomp environment is completely deterministic so it can't be allowed to get timing info, it has to be deterministic so in the future I can enable a computing mode that does a parallel computing for each task with server side transparent checkpointing and verification that the output is the same from all the 2/3 seller computers for each task, without the buyer even noticing (for now the verification is left to the buyer client side and there's no checkpointing, since that would require more kernel changes to track the dirty bits but it'll be easy to extend once the basic mode is finished). Eliminating a cold-cache read of the cr4 global variable will save one cacheline during the tlb flush while making the code per-cpu-safe at the same time. Thanks to Mikael Pettersson for noticing the tlb flush wasn't per-cpu-safe. The global tlb flush can run from irq (IPI calling do_flush_tlb_all) but it'll be transparent to the switch_to code since the IPI won't make any change to the cr4 contents from the point of view of the interrupted code and since it's now all per-cpu stuff, it will not race. So no need to disable irqs in switch_to slow path. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 29 +++++++++++++++++++++++++++++ arch/x86_64/kernel/process.c | 29 +++++++++++++++++++++++++++++ include/asm-i386/tlbflush.h | 12 +++++++----- include/asm-x86_64/tlbflush.h | 12 +++++++----- include/linux/seccomp.h | 10 ++++++++++ 5 files changed, 82 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5f8cfa6b794..ba243a4cc11 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } +/* + * This function selects if the context switch from prev to next + * has to tweak the TSC disable bit in the cr4. + */ +static inline void disable_tsc(struct task_struct *prev_p, + struct task_struct *next_p) +{ + struct thread_info *prev, *next; + + /* + * gcc should eliminate the ->thread_info dereference if + * has_secure_computing returns 0 at compile time (SECCOMP=n). + */ + prev = prev_p->thread_info; + next = next_p->thread_info; + + if (has_secure_computing(prev) || has_secure_computing(next)) { + /* slow path here */ + if (has_secure_computing(prev) && + !has_secure_computing(next)) { + write_cr4(read_cr4() & ~X86_CR4_TSD); + } else if (!has_secure_computing(prev) && + has_secure_computing(next)) + write_cr4(read_cr4() | X86_CR4_TSD); + } +} + /* * switch_to(x,yn) should switch tasks from x to y. * @@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + disable_tsc(prev_p, next_p); + return prev_p; } diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 1d91271796e..7577f9d7a75 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -481,6 +481,33 @@ out: return err; } +/* + * This function selects if the context switch from prev to next + * has to tweak the TSC disable bit in the cr4. + */ +static inline void disable_tsc(struct task_struct *prev_p, + struct task_struct *next_p) +{ + struct thread_info *prev, *next; + + /* + * gcc should eliminate the ->thread_info dereference if + * has_secure_computing returns 0 at compile time (SECCOMP=n). + */ + prev = prev_p->thread_info; + next = next_p->thread_info; + + if (has_secure_computing(prev) || has_secure_computing(next)) { + /* slow path here */ + if (has_secure_computing(prev) && + !has_secure_computing(next)) { + write_cr4(read_cr4() & ~X86_CR4_TSD); + } else if (!has_secure_computing(prev) && + has_secure_computing(next)) + write_cr4(read_cr4() | X86_CR4_TSD); + } +} + /* * This special macro can be used to load a debugging register */ @@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * } } + disable_tsc(prev_p, next_p); + return prev_p; } diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index f22fab0cea2..ab216e1370e 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -22,16 +22,18 @@ */ #define __flush_tlb_global() \ do { \ - unsigned int tmpreg; \ + unsigned int tmpreg, cr4, cr4_orig; \ \ __asm__ __volatile__( \ - "movl %1, %%cr4; # turn off PGE \n" \ + "movl %%cr4, %2; # turn off PGE \n" \ + "movl %2, %1; \n" \ + "andl %3, %1; \n" \ + "movl %1, %%cr4; \n" \ "movl %%cr3, %0; \n" \ "movl %0, %%cr3; # flush TLB \n" \ "movl %2, %%cr4; # turn PGE back on \n" \ - : "=&r" (tmpreg) \ - : "r" (mmu_cr4_features & ~X86_CR4_PGE), \ - "r" (mmu_cr4_features) \ + : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ + : "i" (~X86_CR4_PGE) \ : "memory"); \ } while (0) diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h index 2e811ac262a..06174238252 100644 --- a/include/asm-x86_64/tlbflush.h +++ b/include/asm-x86_64/tlbflush.h @@ -22,16 +22,18 @@ */ #define __flush_tlb_global() \ do { \ - unsigned long tmpreg; \ + unsigned long tmpreg, cr4, cr4_orig; \ \ __asm__ __volatile__( \ - "movq %1, %%cr4; # turn off PGE \n" \ + "movq %%cr4, %2; # turn off PGE \n" \ + "movq %2, %1; \n" \ + "andq %3, %1; \n" \ + "movq %1, %%cr4; \n" \ "movq %%cr3, %0; # flush TLB \n" \ "movq %0, %%cr3; \n" \ "movq %2, %%cr4; # turn PGE back on \n" \ - : "=&r" (tmpreg) \ - : "r" (mmu_cr4_features & ~X86_CR4_PGE), \ - "r" (mmu_cr4_features) \ + : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ + : "i" (~X86_CR4_PGE) \ : "memory"); \ } while (0) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 3a2702bbb1d..dc89116bb1c 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -19,6 +19,11 @@ static inline void secure_computing(int this_syscall) __secure_computing(this_syscall); } +static inline int has_secure_computing(struct thread_info *ti) +{ + return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); +} + #else /* CONFIG_SECCOMP */ #if (__GNUC__ > 2) @@ -28,6 +33,11 @@ static inline void secure_computing(int this_syscall) #endif #define secure_computing(x) do { } while (0) +/* static inline to preserve typechecking */ +static inline int has_secure_computing(struct thread_info *ti) +{ + return 0; +} #endif /* CONFIG_SECCOMP */ -- cgit v1.2.3-70-g09d2 From 3de0a70bd926ff974adb27a38d4fd1049f05e54e Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Mon, 27 Jun 2005 14:36:48 -0700 Subject: [PATCH] cciss: pci id fix This patch fixes a PCI ID I got wrong before. It also adds support for another new SAS controller due out this summer. I didn't have a marketing name prior to my last submission. Also modifies the copyright date range. Signed-off-by: Mike Miller Acked-by: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cciss.txt | 1 + drivers/block/cciss.c | 9 ++++++--- include/linux/pci_ids.h | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index d599beb9df8..c8f9a73111d 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -17,6 +17,7 @@ This driver is known to work with the following cards: * SA P600 * SA P800 * SA E400 + * SA E300 If nodes are not already created in the /dev/cciss directory, run as root: diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index abde27027c0..0cd606ce222 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1,6 +1,6 @@ /* * Disk Array driver for HP SA 5xxx and 6xxx Controllers - * Copyright 2000, 2002 Hewlett-Packard Development Company, L.P. + * Copyright 2000, 2005 Hewlett-Packard Development Company, L.P. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,7 +54,7 @@ MODULE_AUTHOR("Hewlett-Packard Company"); MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6"); MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" - " SA6i P600 P800 E400"); + " SA6i P600 P800 E400 E300"); MODULE_LICENSE("GPL"); #include "cciss_cmd.h" @@ -85,8 +85,10 @@ static const struct pci_device_id cciss_pci_device_id[] = { 0x103C, 0x3225, 0, 0, 0}, { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB, 0x103c, 0x3223, 0, 0, 0}, - { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB, + { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, 0x103c, 0x3231, 0, 0, 0}, + { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC, + 0x103c, 0x3233, 0, 0, 0}, {0,} }; MODULE_DEVICE_TABLE(pci, cciss_pci_device_id); @@ -110,6 +112,7 @@ static struct board_type products[] = { { 0x3225103C, "Smart Array P600", &SA5_access}, { 0x3223103C, "Smart Array P800", &SA5_access}, { 0x3231103C, "Smart Array E400", &SA5_access}, + { 0x3233103C, "Smart Array E300", &SA5_access}, }; /* How long to wait (in millesconds) for board to go into simple mode */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3af7450278b..a3961e1d518 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -712,8 +712,9 @@ #define PCI_DEVICE_ID_HP_DIVA_AUX 0x1290 #define PCI_DEVICE_ID_HP_DIVA_RMP3 0x1301 #define PCI_DEVICE_ID_HP_CISSA 0x3220 -#define PCI_DEVICE_ID_HP_CISSB 0x3230 +#define PCI_DEVICE_ID_HP_CISSB 0x3222 #define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031 +#define PCI_DEVICE_ID_HP_CISSC 0x3230 #define PCI_VENDOR_ID_PCTECH 0x1042 #define PCI_DEVICE_ID_PCTECH_RZ1000 0x1000 -- cgit v1.2.3-70-g09d2 From cd6fb584cf7f18ec6b221192b57d712ecc8c1859 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Mon, 27 Jun 2005 14:36:49 -0700 Subject: [PATCH] cciss: pci domain info pass 2 This is pass 2 of my patch to add pci domain info to an existing ioctl. This time I insert the domain between dev_fn and board_id as Willy suggested and change the var to unsigned short to ease Christoph's concerns. Although I thought unsigned int was the correct var type for this. I also thought it didn't matter where I inserted it in the structure. Signed-off-by: Mike Miller Acked-by: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 1 + include/linux/cciss_ioctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 0cd606ce222..d5d0fa538f1 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -638,6 +638,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, cciss_pci_info_struct pciinfo; if (!arg) return -EINVAL; + pciinfo.domain = pci_domain_nr(host->pdev->bus); pciinfo.bus = host->pdev->bus->number; pciinfo.dev_fn = host->pdev->devfn; pciinfo.board_id = host->board_id; diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h index ee0c6e8995d..424d5e622b4 100644 --- a/include/linux/cciss_ioctl.h +++ b/include/linux/cciss_ioctl.h @@ -10,6 +10,7 @@ typedef struct _cciss_pci_info_struct { unsigned char bus; + unsigned short domain; unsigned char dev_fn; __u32 board_id; } cciss_pci_info_struct; -- cgit v1.2.3-70-g09d2 From 9ec4b1f356b3bad928ae8e2aa9caebfa737d52df Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 27 Jun 2005 15:17:01 -0700 Subject: [PATCH] kprobes: fix single-step out of line - take2 Now that PPC64 has no-execute support, here is a second try to fix the single step out of line during kprobe execution. Kprobes on x86_64 already solved this problem by allocating an executable page and using it as the scratch area for stepping out of line. Reuse that. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/kprobes.c | 26 ++++++++-- arch/x86_64/kernel/kprobes.c | 113 +------------------------------------------ include/asm-ia64/kprobes.h | 1 + include/asm-ppc64/kprobes.h | 2 +- include/linux/kprobes.h | 2 + kernel/kprobes.c | 101 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 117 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 782ce3efa2c..86cc5496db9 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -36,6 +36,8 @@ #include #include +static DECLARE_MUTEX(kprobe_mutex); + static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; static struct kprobe *kprobe_prev; @@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on rfid or mtmsrd\n"); ret = -EINVAL; } + + /* insn must be on a special executable page on ppc64 */ + if (!ret) { + up(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + down(&kprobe_mutex); + if (!p->ainsn.insn) + ret = -ENOMEM; + } return ret; } @@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p) void arch_remove_kprobe(struct kprobe *p) { + up(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + down(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { + kprobe_opcode_t insn = *p->ainsn.insn; + regs->msr |= MSR_SE; - /*single step inline if it a breakpoint instruction*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) + + /* single step inline if it is a trap variant */ + if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn)) regs->nip = (unsigned long)p->addr; else - regs->nip = (unsigned long)&p->ainsn.insn; + regs->nip = (unsigned long)p->ainsn.insn; } static inline void save_previous_kprobe(void) @@ -205,9 +222,10 @@ no_kprobe: static void resume_execution(struct kprobe *p, struct pt_regs *regs) { int ret; + unsigned int insn = *p->ainsn.insn; regs->nip = (unsigned long)p->addr; - ret = emulate_step(regs, p->ainsn.insn[0]); + ret = emulate_step(regs, insn); if (ret == 0) regs->nip = (unsigned long)p->addr + 4; } diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 4e680f87a75..6a1c88376be 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -38,7 +38,7 @@ #include #include #include -#include + #include #include #include @@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev; static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; -static kprobe_opcode_t *get_insn_slot(void); -static void free_insn_slot(kprobe_opcode_t *slot); void jprobe_return_end(void); /* copy of the kernel stack at the probe fire time */ @@ -681,112 +679,3 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } - -/* - * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped. - * By default on x86_64, pages we get from kmalloc or vmalloc are not - * executable. Single-stepping an instruction on such a page yields an - * oops. So instead of storing the instruction copies in their respective - * kprobe objects, we allocate a page, map it executable, and store all the - * instruction copies there. (We can allocate additional pages if somebody - * inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE - * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t) - * bytes. - */ -#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t))) -struct kprobe_insn_page { - struct hlist_node hlist; - kprobe_opcode_t *insns; /* page of instruction slots */ - char slot_used[INSNS_PER_PAGE]; - int nused; -}; - -static struct hlist_head kprobe_insn_pages; - -/** - * get_insn_slot() - Find a slot on an executable page for an instruction. - * We allocate an executable page if there's no room on existing ones. - */ -static kprobe_opcode_t *get_insn_slot(void) -{ - struct kprobe_insn_page *kip; - struct hlist_node *pos; - - hlist_for_each(pos, &kprobe_insn_pages) { - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); - if (kip->nused < INSNS_PER_PAGE) { - int i; - for (i = 0; i < INSNS_PER_PAGE; i++) { - if (!kip->slot_used[i]) { - kip->slot_used[i] = 1; - kip->nused++; - return kip->insns + (i*MAX_INSN_SIZE); - } - } - /* Surprise! No unused slots. Fix kip->nused. */ - kip->nused = INSNS_PER_PAGE; - } - } - - /* All out of space. Need to allocate a new page. Use slot 0.*/ - kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); - if (!kip) { - return NULL; - } - - /* - * For the %rip-relative displacement fixups to be doable, we - * need our instruction copy to be within +/- 2GB of any data it - * might access via %rip. That is, within 2GB of where the - * kernel image and loaded module images reside. So we allocate - * a page in the module loading area. - */ - kip->insns = module_alloc(PAGE_SIZE); - if (!kip->insns) { - kfree(kip); - return NULL; - } - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, &kprobe_insn_pages); - memset(kip->slot_used, 0, INSNS_PER_PAGE); - kip->slot_used[0] = 1; - kip->nused = 1; - return kip->insns; -} - -/** - * free_insn_slot() - Free instruction slot obtained from get_insn_slot(). - */ -static void free_insn_slot(kprobe_opcode_t *slot) -{ - struct kprobe_insn_page *kip; - struct hlist_node *pos; - - hlist_for_each(pos, &kprobe_insn_pages) { - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); - if (kip->insns <= slot - && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) { - int i = (slot - kip->insns) / MAX_INSN_SIZE; - kip->slot_used[i] = 0; - kip->nused--; - if (kip->nused == 0) { - /* - * Page is no longer in use. Free it unless - * it's the last one. We keep the last one - * so as not to have to set it up again the - * next time somebody inserts a probe. - */ - hlist_del(&kip->hlist); - if (hlist_empty(&kprobe_insn_pages)) { - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, - &kprobe_insn_pages); - } else { - module_free(NULL, kip->insns); - kfree(kip); - } - } - return; - } - } -} diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 7b700035e36..25d8b1edfcb 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -28,6 +28,7 @@ #include #include +#define MAX_INSN_SIZE 16 #define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) typedef union cmp_inst { diff --git a/include/asm-ppc64/kprobes.h b/include/asm-ppc64/kprobes.h index 19b468bed05..790cf7c5277 100644 --- a/include/asm-ppc64/kprobes.h +++ b/include/asm-ppc64/kprobes.h @@ -45,7 +45,7 @@ typedef unsigned int kprobe_opcode_t; /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ - kprobe_opcode_t insn[MAX_INSN_SIZE]; + kprobe_opcode_t *insn; }; #ifdef CONFIG_KPROBES diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5e1a7b0d7b3..d304d457985 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -177,6 +177,8 @@ extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern void arch_remove_kprobe(struct kprobe *p); extern void show_registers(struct pt_regs *regs); +extern kprobe_opcode_t *get_insn_slot(void); +extern void free_insn_slot(kprobe_opcode_t *slot); /* Get the kprobe at this addr (if any). Must have called lock_kprobes */ struct kprobe *get_kprobe(void *addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 334f37472c5..65242529a75 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,106 @@ unsigned int kprobe_cpu = NR_CPUS; static DEFINE_SPINLOCK(kprobe_lock); static struct kprobe *curr_kprobe; +/* + * kprobe->ainsn.insn points to the copy of the instruction to be + * single-stepped. x86_64, POWER4 and above have no-exec support and + * stepping on the instruction on a vmalloced/kmalloced/data page + * is a recipe for disaster + */ +#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + +struct kprobe_insn_page { + struct hlist_node hlist; + kprobe_opcode_t *insns; /* Page of instruction slots */ + char slot_used[INSNS_PER_PAGE]; + int nused; +}; + +static struct hlist_head kprobe_insn_pages; + +/** + * get_insn_slot() - Find a slot on an executable page for an instruction. + * We allocate an executable page if there's no room on existing ones. + */ +kprobe_opcode_t *get_insn_slot(void) +{ + struct kprobe_insn_page *kip; + struct hlist_node *pos; + + hlist_for_each(pos, &kprobe_insn_pages) { + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); + if (kip->nused < INSNS_PER_PAGE) { + int i; + for (i = 0; i < INSNS_PER_PAGE; i++) { + if (!kip->slot_used[i]) { + kip->slot_used[i] = 1; + kip->nused++; + return kip->insns + (i * MAX_INSN_SIZE); + } + } + /* Surprise! No unused slots. Fix kip->nused. */ + kip->nused = INSNS_PER_PAGE; + } + } + + /* All out of space. Need to allocate a new page. Use slot 0.*/ + kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); + if (!kip) { + return NULL; + } + + /* + * Use module_alloc so this page is within +/- 2GB of where the + * kernel image and loaded module images reside. This is required + * so x86_64 can correctly handle the %rip-relative fixups. + */ + kip->insns = module_alloc(PAGE_SIZE); + if (!kip->insns) { + kfree(kip); + return NULL; + } + INIT_HLIST_NODE(&kip->hlist); + hlist_add_head(&kip->hlist, &kprobe_insn_pages); + memset(kip->slot_used, 0, INSNS_PER_PAGE); + kip->slot_used[0] = 1; + kip->nused = 1; + return kip->insns; +} + +void free_insn_slot(kprobe_opcode_t *slot) +{ + struct kprobe_insn_page *kip; + struct hlist_node *pos; + + hlist_for_each(pos, &kprobe_insn_pages) { + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); + if (kip->insns <= slot && + slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { + int i = (slot - kip->insns) / MAX_INSN_SIZE; + kip->slot_used[i] = 0; + kip->nused--; + if (kip->nused == 0) { + /* + * Page is no longer in use. Free it unless + * it's the last one. We keep the last one + * so as not to have to set it up again the + * next time somebody inserts a probe. + */ + hlist_del(&kip->hlist); + if (hlist_empty(&kprobe_insn_pages)) { + INIT_HLIST_NODE(&kip->hlist); + hlist_add_head(&kip->hlist, + &kprobe_insn_pages); + } else { + module_free(NULL, kip->insns); + kfree(kip); + } + } + return; + } + } +} + /* Locks kprobe: irqs must be disabled */ void lock_kprobes(void) { -- cgit v1.2.3-70-g09d2 From 802eae7c800fb7f583e6c06afa363585af2bef00 Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:08 -0700 Subject: [PATCH] Return probe redesign: architecture independent changes The following is the second version of the function return probe patches I sent out earlier this week. Changes since my last submission include: * Fix in ppc64 code removing an unneeded call to re-enable preemption * Fix a build problem in ia64 when kprobes was turned off * Added another BUG_ON check to each of the architecture trampoline handlers My initial patch description ==> From my experiences with adding return probes to x86_64 and ia64, and the feedback on LKML to those patches, I think we can simplify the design for return probes. The following patch tweaks the original design such that: * Instead of storing the stack address in the return probe instance, the task pointer is stored. This gives us all we need in order to: - find the correct return probe instance when we enter the trampoline (even if we are recursing) - find all left-over return probe instances when the task is going away This has the side effect of simplifying the implementation since more work can be done in kernel/kprobes.c since architecture specific knowledge of the stack layout is no longer required. Specifically, we no longer have: - arch_get_kprobe_task() - arch_kprobe_flush_task() - get_rp_inst_tsk() - get_rp_inst() - trampoline_post_handler() * Instead of splitting the return probe handling and cleanup logic across the pre and post trampoline handlers, all the work is pushed into the pre function (trampoline_probe_handler), and then we skip single stepping the original function. In this case the original instruction to be single stepped was just a NOP, and we can do without the extra interruption. The new flow of events to having a return probe handler execute when a target function exits is: * At system initialization time, a kprobe is inserted at the beginning of kretprobe_trampoline. kernel/kprobes.c use to handle this on it's own, but ia64 needed to do this a little differently (i.e. a function pointer is really a pointer to a structure containing the instruction pointer and a global pointer), so I added the notion of arch_init(), so that kernel/kprobes.c:init_kprobes() now allows architecture specific initialization by calling arch_init() before exiting. Each architecture now registers a kprobe on it's own trampoline function. * register_kretprobe() will insert a kprobe at the beginning of the targeted function with the kprobe pre_handler set to arch_prepare_kretprobe (still no change) * When the target function is entered, the kprobe is fired, calling arch_prepare_kretprobe (still no change) * In arch_prepare_kretprobe() we try to get a free instance and if one is available then we fill out the instance with a pointer to the return probe, the original return address, and a pointer to the task structure (instead of the stack address.) Just like before we change the return address to the trampoline function and mark the instance as used. If multiple return probes are registered for a given target function, then arch_prepare_kretprobe() will get called multiple times for the same task (since our kprobe implementation is able to handle multiple kprobes at the same address.) Past the first call to arch_prepare_kretprobe, we end up with the original address stored in the return probe instance pointing to our trampoline function. (This is a significant difference from the original arch_prepare_kretprobe design.) * Target function executes like normal and then returns to kretprobe_trampoline. * kprobe inserted on the first instruction of kretprobe_trampoline is fired and calls trampoline_probe_handler() (no change here) * trampoline_probe_handler() consumes each of the instances associated with the current task by calling the registered handler function and marking the instance as unused until an instance is found that has a return address different then the trampoline function. (change similar to my previous ia64 RFC) * If the task is killed with some left-over return probe instances (meaning that a target function was entered, but never returned), then we just free any instances associated with the task. (Not much different other then we can handle this without calling architecture specific functions.) There is a known problem that this patch does not yet solve where registering a return probe flush_old_exec or flush_thread will put us in a bad state. Most likely the best way to handle this is to not allow registering return probes on these two functions. (Significant change) This patch series applies to the 2.6.12-rc6-mm1 kernel, and provides: * kernel/kprobes.c changes * i386 patch of existing return probes implementation * x86_64 patch of existing return probe implementation * ia64 implementation * ppc64 implementation (provided by Ananth) This patch implements the architecture independant changes for a reworking of the kprobes based function return probes design. Changes include: * Removing functions for querying a return probe instance off a stack address * Removing the stack_addr field from the kretprobe_instance definition, and adding a task pointer * Adding architecture specific initialization via arch_init() * Removing extern definitions for the architecture trampoline functions (this isn't needed anymore since the architecture handles the initialization of the kprobe in the return probe trampoline function.) Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 28 +++----------------- kernel/kprobes.c | 69 ++++++++++++++----------------------------------- 2 files changed, 22 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index d304d457985..b7a194c4362 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -104,33 +104,12 @@ struct jprobe { }; #ifdef ARCH_SUPPORTS_KRETPROBES -extern int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs); -extern void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags); -extern struct task_struct *arch_get_kprobe_task(void *ptr); extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs); -extern void arch_kprobe_flush_task(struct task_struct *tk); #else /* ARCH_SUPPORTS_KRETPROBES */ -static inline void kretprobe_trampoline(void) -{ -} -static inline int trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) -{ - return 0; -} -static inline void trampoline_post_handler(struct kprobe *p, - struct pt_regs *regs, unsigned long flags) -{ -} static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { } -static inline void arch_kprobe_flush_task(struct task_struct *tk) -{ -} -#define arch_get_kprobe_task(ptr) ((struct task_struct *)NULL) #endif /* ARCH_SUPPORTS_KRETPROBES */ /* * Function-return probe - @@ -155,8 +134,8 @@ struct kretprobe_instance { struct hlist_node uflist; /* either on free list or used list */ struct hlist_node hlist; struct kretprobe *rp; - void *ret_addr; - void *stack_addr; + kprobe_opcode_t *ret_addr; + struct task_struct *task; }; #ifdef CONFIG_KPROBES @@ -176,6 +155,7 @@ extern void arch_copy_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern void arch_remove_kprobe(struct kprobe *p); +extern int arch_init(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); extern void free_insn_slot(kprobe_opcode_t *slot); @@ -196,8 +176,6 @@ int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp); -struct kretprobe_instance *get_rp_inst(void *sara); -struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk); void add_rp_inst(struct kretprobe_instance *ri); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 65242529a75..90c0e82b650 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -240,12 +240,6 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, - .pre_handler = trampoline_probe_handler, - .post_handler = trampoline_post_handler -}; - struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) { struct hlist_node *node; @@ -264,35 +258,18 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) return NULL; } -struct kretprobe_instance *get_rp_inst(void *sara) -{ - struct hlist_head *head; - struct hlist_node *node; - struct task_struct *tsk; - struct kretprobe_instance *ri; - - tsk = arch_get_kprobe_task(sara); - head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; - hlist_for_each_entry(ri, node, head, hlist) { - if (ri->stack_addr == sara) - return ri; - } - return NULL; -} - void add_rp_inst(struct kretprobe_instance *ri) { - struct task_struct *tsk; /* * Remove rp inst off the free list - * Add it back when probed function returns */ hlist_del(&ri->uflist); - tsk = arch_get_kprobe_task(ri->stack_addr); + /* Add rp inst onto table */ INIT_HLIST_NODE(&ri->hlist); hlist_add_head(&ri->hlist, - &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]); + &kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]); /* Also add this rp inst to the used list. */ INIT_HLIST_NODE(&ri->uflist); @@ -319,34 +296,25 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; } -struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk) -{ - struct task_struct *tsk; - struct hlist_head *head; - struct hlist_node *node; - struct kretprobe_instance *ri; - - head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)]; - - hlist_for_each_entry(ri, node, head, hlist) { - tsk = arch_get_kprobe_task(ri->stack_addr); - if (tsk == tk) - return ri; - } - return NULL; -} - /* - * This function is called from do_exit or do_execv when task tk's stack is - * about to be recycled. Recycle any function-return probe instances - * associated with this task. These represent probed functions that have - * been called but may never return. + * This function is called from exit_thread or flush_thread when task tk's + * stack is being recycled so that we can recycle any function-return probe + * instances associated with this task. These left over instances represent + * probed functions that have been called but will never return. */ void kprobe_flush_task(struct task_struct *tk) { + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node, *tmp; unsigned long flags = 0; + spin_lock_irqsave(&kprobe_lock, flags); - arch_kprobe_flush_task(tk); + head = kretprobe_inst_table_head(current); + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task == tk) + recycle_rp_inst(ri); + } spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -606,9 +574,10 @@ static int __init init_kprobes(void) INIT_HLIST_HEAD(&kretprobe_inst_table[i]); } - err = register_die_notifier(&kprobe_exceptions_nb); - /* Register the trampoline probe for return probe */ - register_kprobe(&trampoline_p); + err = arch_init(); + if (!err) + err = register_die_notifier(&kprobe_exceptions_nb); + return err; } -- cgit v1.2.3-70-g09d2