diff options
Diffstat (limited to 'drivers')
111 files changed, 5557 insertions, 2912 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7e1d077874d..58801d718cc 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -49,12 +49,12 @@ static struct kset_uevent_ops memory_uevent_ops = { static struct notifier_block *memory_chain; -static int register_memory_notifier(struct notifier_block *nb) +int register_memory_notifier(struct notifier_block *nb) { return notifier_chain_register(&memory_chain, nb); } -static void unregister_memory_notifier(struct notifier_block *nb) +void unregister_memory_notifier(struct notifier_block *nb) { notifier_chain_unregister(&memory_chain, nb); } @@ -62,8 +62,7 @@ static void unregister_memory_notifier(struct notifier_block *nb) /* * register_memory - Setup a sysfs device for a memory block */ -static int -register_memory(struct memory_block *memory, struct mem_section *section, +int register_memory(struct memory_block *memory, struct mem_section *section, struct node *root) { int error; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index c4b9d2adfc0..139cbba7618 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -117,7 +117,7 @@ config BLK_DEV_XD config PARIDE tristate "Parallel port IDE device support" - depends on PARPORT + depends on PARPORT_PC ---help--- There are many external CD-ROM and disk devices that connect through your computer's parallel port. Most of them are actually IDE devices diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 485345c8e63..33d6f237b2e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -54,11 +54,15 @@ #include <linux/errno.h> #include <linux/file.h> #include <linux/ioctl.h> +#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/kernel.h> #include <net/sock.h> #include <linux/devfs_fs_kernel.h> #include <asm/uaccess.h> +#include <asm/system.h> #include <asm/types.h> #include <linux/nbd.h> @@ -230,14 +234,6 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) request.len = htonl(size); memcpy(request.handle, &req, sizeof(req)); - down(&lo->tx_lock); - - if (!sock || !lo->sock) { - printk(KERN_ERR "%s: Attempted send on closed socket\n", - lo->disk->disk_name); - goto error_out; - } - dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", lo->disk->disk_name, req, nbdcmd_to_ascii(nbd_cmd(req)), @@ -276,11 +272,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) } } } - up(&lo->tx_lock); return 0; error_out: - up(&lo->tx_lock); return 1; } @@ -289,9 +283,14 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle) struct request *req; struct list_head *tmp; struct request *xreq; + int err; memcpy(&xreq, handle, sizeof(xreq)); + err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); + if (unlikely(err)) + goto out; + spin_lock(&lo->queue_lock); list_for_each(tmp, &lo->queue_head) { req = list_entry(tmp, struct request, queuelist); @@ -302,7 +301,11 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle) return req; } spin_unlock(&lo->queue_lock); - return NULL; + + err = -ENOENT; + +out: + return ERR_PTR(err); } static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) @@ -331,7 +334,11 @@ static struct request *nbd_read_stat(struct nbd_device *lo) goto harderror; } req = nbd_find_request(lo, reply.handle); - if (req == NULL) { + if (unlikely(IS_ERR(req))) { + result = PTR_ERR(req); + if (result != -ENOENT) + goto harderror; + printk(KERN_ERR "%s: Unexpected reply (%p)\n", lo->disk->disk_name, reply.handle); result = -EBADR; @@ -395,19 +402,24 @@ static void nbd_clear_que(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); - do { - req = NULL; - spin_lock(&lo->queue_lock); - if (!list_empty(&lo->queue_head)) { - req = list_entry(lo->queue_head.next, struct request, queuelist); - list_del_init(&req->queuelist); - } - spin_unlock(&lo->queue_lock); - if (req) { - req->errors++; - nbd_end_request(req); - } - } while (req); + /* + * Because we have set lo->sock to NULL under the tx_lock, all + * modifications to the list must have completed by now. For + * the same reason, the active_req must be NULL. + * + * As a consequence, we don't need to take the spin lock while + * purging the list here. + */ + BUG_ON(lo->sock); + BUG_ON(lo->active_req); + + while (!list_empty(&lo->queue_head)) { + req = list_entry(lo->queue_head.next, struct request, + queuelist); + list_del_init(&req->queuelist); + req->errors++; + nbd_end_request(req); + } } /* @@ -435,11 +447,6 @@ static void do_nbd_request(request_queue_t * q) BUG_ON(lo->magic != LO_MAGIC); - if (!lo->file) { - printk(KERN_ERR "%s: Request when not-ready\n", - lo->disk->disk_name); - goto error_out; - } nbd_cmd(req) = NBD_CMD_READ; if (rq_data_dir(req) == WRITE) { nbd_cmd(req) = NBD_CMD_WRITE; @@ -453,32 +460,34 @@ static void do_nbd_request(request_queue_t * q) req->errors = 0; spin_unlock_irq(q->queue_lock); - spin_lock(&lo->queue_lock); - - if (!lo->file) { - spin_unlock(&lo->queue_lock); - printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n", - lo->disk->disk_name); + down(&lo->tx_lock); + if (unlikely(!lo->sock)) { + up(&lo->tx_lock); + printk(KERN_ERR "%s: Attempted send on closed socket\n", + lo->disk->disk_name); req->errors++; nbd_end_request(req); spin_lock_irq(q->queue_lock); continue; } - list_add(&req->queuelist, &lo->queue_head); - spin_unlock(&lo->queue_lock); + lo->active_req = req; if (nbd_send_req(lo, req) != 0) { printk(KERN_ERR "%s: Request send failed\n", lo->disk->disk_name); - if (nbd_find_request(lo, (char *)&req) != NULL) { - /* we still own req */ - req->errors++; - nbd_end_request(req); - } else /* we're racing with nbd_clear_que */ - printk(KERN_DEBUG "nbd: can't find req\n"); + req->errors++; + nbd_end_request(req); + } else { + spin_lock(&lo->queue_lock); + list_add(&req->queuelist, &lo->queue_head); + spin_unlock(&lo->queue_lock); } + lo->active_req = NULL; + up(&lo->tx_lock); + wake_up_all(&lo->active_wq); + spin_lock_irq(q->queue_lock); continue; @@ -529,17 +538,10 @@ static int nbd_ioctl(struct inode *inode, struct file *file, down(&lo->tx_lock); lo->sock = NULL; up(&lo->tx_lock); - spin_lock(&lo->queue_lock); file = lo->file; lo->file = NULL; - spin_unlock(&lo->queue_lock); nbd_clear_que(lo); - spin_lock(&lo->queue_lock); - if (!list_empty(&lo->queue_head)) { - printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n"); - error = -EBUSY; - } - spin_unlock(&lo->queue_lock); + BUG_ON(!list_empty(&lo->queue_head)); if (file) fput(file); return error; @@ -598,24 +600,19 @@ static int nbd_ioctl(struct inode *inode, struct file *file, lo->sock = NULL; } up(&lo->tx_lock); - spin_lock(&lo->queue_lock); file = lo->file; lo->file = NULL; - spin_unlock(&lo->queue_lock); nbd_clear_que(lo); printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); if (file) fput(file); return lo->harderror; case NBD_CLEAR_QUE: - down(&lo->tx_lock); - if (lo->sock) { - up(&lo->tx_lock); - return 0; /* probably should be error, but that would - * break "nbd-client -d", so just return 0 */ - } - up(&lo->tx_lock); - nbd_clear_que(lo); + /* + * This is for compatibility only. The queue is always cleared + * by NBD_DO_IT or NBD_CLEAR_SOCK. + */ + BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); return 0; case NBD_PRINT_DEBUG: printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", @@ -688,6 +685,7 @@ static int __init nbd_init(void) spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); init_MUTEX(&nbd_dev[i].tx_lock); + init_waitqueue_head(&nbd_dev[i].active_wq); nbd_dev[i].blksize = 1024; nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */ disk->major = NBD_MAJOR; diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig index 17ff4056125..c0d2854dd09 100644 --- a/drivers/block/paride/Kconfig +++ b/drivers/block/paride/Kconfig @@ -4,11 +4,12 @@ # PARIDE doesn't need PARPORT, but if PARPORT is configured as a module, # PARIDE must also be a module. The bogus CONFIG_PARIDE_PARPORT option # controls the choices given to the user ... +# PARIDE only supports PC style parports. Tough for USB or other parports... config PARIDE_PARPORT tristate depends on PARIDE!=n - default m if PARPORT=m - default y if PARPORT!=m + default m if PARPORT_PC=m + default y if PARPORT_PC!=m comment "Parallel IDE high-level drivers" depends on PARIDE diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 84e68cdd451..5ebd06b1b4c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -985,7 +985,7 @@ config HPET_MMAP config HANGCHECK_TIMER tristate "Hangcheck timer" - depends on X86 || IA64 || PPC64 || ARCH_S390 + depends on X86 || IA64 || PPC64 || S390 help The hangcheck-timer module detects when the system has gone out to lunch past a certain margin. It can reboot the system diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index 66e53dd450f..40a67c86420 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -120,7 +120,7 @@ __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); #if defined(CONFIG_X86) # define HAVE_MONOTONIC # define TIMER_FREQ 1000000000ULL -#elif defined(CONFIG_ARCH_S390) +#elif defined(CONFIG_S390) /* FA240000 is 1 Second in the IBM time universe (Page 4-38 Principles of Op for zSeries */ # define TIMER_FREQ 0xFA240000ULL #elif defined(CONFIG_IA64) diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c index 6f673d2de0b..49769f59ea1 100644 --- a/drivers/char/hw_random.c +++ b/drivers/char/hw_random.c @@ -1,4 +1,9 @@ /* + Added support for the AMD Geode LX RNG + (c) Copyright 2004-2005 Advanced Micro Devices, Inc. + + derived from + Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG) (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com> @@ -95,6 +100,11 @@ static unsigned int via_data_present (void); static u32 via_data_read (void); #endif +static int __init geode_init(struct pci_dev *dev); +static void geode_cleanup(void); +static unsigned int geode_data_present (void); +static u32 geode_data_read (void); + struct rng_operations { int (*init) (struct pci_dev *dev); void (*cleanup) (void); @@ -122,6 +132,7 @@ enum { rng_hw_intel, rng_hw_amd, rng_hw_via, + rng_hw_geode, }; static struct rng_operations rng_vendor_ops[] = { @@ -139,6 +150,9 @@ static struct rng_operations rng_vendor_ops[] = { /* rng_hw_via */ { via_init, via_cleanup, via_data_present, via_data_read, 1 }, #endif + + /* rng_hw_geode */ + { geode_init, geode_cleanup, geode_data_present, geode_data_read, 4 } }; /* @@ -159,6 +173,9 @@ static struct pci_device_id rng_pci_tbl[] = { { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_geode }, + { 0, }, /* terminate list */ }; MODULE_DEVICE_TABLE (pci, rng_pci_tbl); @@ -460,6 +477,57 @@ static void via_cleanup(void) } #endif +/*********************************************************************** + * + * AMD Geode RNG operations + * + */ + +static void __iomem *geode_rng_base = NULL; + +#define GEODE_RNG_DATA_REG 0x50 +#define GEODE_RNG_STATUS_REG 0x54 + +static u32 geode_data_read(void) +{ + u32 val; + + assert(geode_rng_base != NULL); + val = readl(geode_rng_base + GEODE_RNG_DATA_REG); + return val; +} + +static unsigned int geode_data_present(void) +{ + u32 val; + + assert(geode_rng_base != NULL); + val = readl(geode_rng_base + GEODE_RNG_STATUS_REG); + return val; +} + +static void geode_cleanup(void) +{ + iounmap(geode_rng_base); + geode_rng_base = NULL; +} + +static int geode_init(struct pci_dev *dev) +{ + unsigned long rng_base = pci_resource_start(dev, 0); + + if (rng_base == 0) + return 1; + + geode_rng_base = ioremap(rng_base, 0x58); + + if (geode_rng_base == NULL) { + printk(KERN_ERR PFX "Cannot ioremap RNG memory\n"); + return -EBUSY; + } + + return 0; +} /*********************************************************************** * @@ -574,7 +642,7 @@ static int __init rng_init (void) DPRINTK ("ENTER\n"); - /* Probe for Intel, AMD RNGs */ + /* Probe for Intel, AMD, Geode RNGs */ for_each_pci_dev(pdev) { ent = pci_match_id(rng_pci_tbl, pdev); if (ent) { diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 1f56b4cf0f5..561430ed94a 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -787,7 +787,6 @@ int ipmi_destroy_user(ipmi_user_t user) int i; unsigned long flags; struct cmd_rcvr *rcvr; - struct list_head *entry1, *entry2; struct cmd_rcvr *rcvrs = NULL; user->valid = 1; @@ -812,8 +811,7 @@ int ipmi_destroy_user(ipmi_user_t user) * synchronize_rcu()) then free everything in that list. */ down(&intf->cmd_rcvrs_lock); - list_for_each_safe_rcu(entry1, entry2, &intf->cmd_rcvrs) { - rcvr = list_entry(entry1, struct cmd_rcvr, link); + list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { if (rcvr->user == user) { list_del_rcu(&rcvr->link); rcvr->next = rcvrs; diff --git a/drivers/char/watchdog/Kconfig b/drivers/char/watchdog/Kconfig index 344001b45af..a6544790af6 100644 --- a/drivers/char/watchdog/Kconfig +++ b/drivers/char/watchdog/Kconfig @@ -438,7 +438,7 @@ config INDYDOG config ZVM_WATCHDOG tristate "z/VM Watchdog Timer" - depends on WATCHDOG && ARCH_S390 + depends on WATCHDOG && S390 help IBM s/390 and zSeries machines running under z/VM 5.1 or later provide a virtual watchdog timer to their guest that cause a diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index 64fbbb01d52..25ef5a86f5f 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -1027,10 +1027,10 @@ static int hpsbpkt_thread(void *__hi) daemonize("khpsbpkt"); + current->flags |= PF_NOFREEZE; + while (1) { if (down_interruptible(&khpsbpkt_sig)) { - if (try_to_freeze()) - continue; printk("khpsbpkt: received unexpected signal?!\n" ); break; } diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 9f2352bd834..a1e660e3531 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -157,7 +157,7 @@ struct input_event_compat { # define COMPAT_TEST test_thread_flag(TIF_IA32) #elif defined(CONFIG_IA64) # define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current)) -#elif defined(CONFIG_ARCH_S390) +#elif defined(CONFIG_S390) # define COMPAT_TEST test_thread_flag(TIF_31BIT) #elif defined(CONFIG_MIPS) # define COMPAT_TEST (current->thread.mflags & MF_32BIT_ADDR) diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index f38696622eb..5e1f5e9653c 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -52,6 +52,7 @@ static char *sensor_location[3] = {NULL, NULL, NULL}; static int limit_adjust = 0; static int fan_speed = -1; +static int verbose = 0; MODULE_AUTHOR("Colin Leroy <colin@colino.net>"); MODULE_DESCRIPTION("Driver for ADT746x thermostat in iBook G4 and " @@ -66,6 +67,10 @@ module_param(fan_speed, int, 0644); MODULE_PARM_DESC(fan_speed,"Specify starting fan speed (0-255) " "(default 64)"); +module_param(verbose, bool, 0); +MODULE_PARM_DESC(verbose,"Verbose log operations " + "(default 0)"); + struct thermostat { struct i2c_client clt; u8 temps[3]; @@ -149,13 +154,13 @@ detach_thermostat(struct i2c_adapter *adapter) if (thread_therm != NULL) { kthread_stop(thread_therm); } - + printk(KERN_INFO "adt746x: Putting max temperatures back from " "%d, %d, %d to %d, %d, %d\n", th->limits[0], th->limits[1], th->limits[2], th->initial_limits[0], th->initial_limits[1], th->initial_limits[2]); - + for (i = 0; i < 3; i++) write_reg(th, LIMIT_REG[i], th->initial_limits[i]); @@ -212,12 +217,14 @@ static void write_fan_speed(struct thermostat *th, int speed, int fan) return; if (th->last_speed[fan] != speed) { - if (speed == -1) - printk(KERN_DEBUG "adt746x: Setting speed to automatic " - "for %s fan.\n", sensor_location[fan+1]); - else - printk(KERN_DEBUG "adt746x: Setting speed to %d " - "for %s fan.\n", speed, sensor_location[fan+1]); + if (verbose) { + if (speed == -1) + printk(KERN_DEBUG "adt746x: Setting speed to automatic " + "for %s fan.\n", sensor_location[fan+1]); + else + printk(KERN_DEBUG "adt746x: Setting speed to %d " + "for %s fan.\n", speed, sensor_location[fan+1]); + } } else return; @@ -298,10 +305,11 @@ static void update_fans_speed (struct thermostat *th) if (new_speed > 255) new_speed = 255; - printk(KERN_DEBUG "adt746x: setting fans speed to %d " - "(limit exceeded by %d on %s) \n", - new_speed, var, - sensor_location[fan_number+1]); + if (verbose) + printk(KERN_DEBUG "adt746x: Setting fans speed to %d " + "(limit exceeded by %d on %s) \n", + new_speed, var, + sensor_location[fan_number+1]); write_both_fan_speed(th, new_speed); th->last_var[fan_number] = var; } else if (var < -2) { @@ -309,8 +317,9 @@ static void update_fans_speed (struct thermostat *th) * so cold (lastvar >= -1) */ if (i == 2 && lastvar < -1) { if (th->last_speed[fan_number] != 0) - printk(KERN_DEBUG "adt746x: Stopping " - "fans.\n"); + if (verbose) + printk(KERN_DEBUG "adt746x: Stopping " + "fans.\n"); write_both_fan_speed(th, 0); } } @@ -406,7 +415,7 @@ static int attach_one_thermostat(struct i2c_adapter *adapter, int addr, th->initial_limits[i] = read_reg(th, LIMIT_REG[i]); set_limit(th, i); } - + printk(KERN_INFO "adt746x: Lowering max temperatures from %d, %d, %d" " to %d, %d, %d\n", th->initial_limits[0], th->initial_limits[1], diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 190878eef99..435427daed7 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -1988,18 +1988,13 @@ static void fcu_lookup_fans(struct device_node *fcu_node) static int fcu_of_probe(struct of_device* dev, const struct of_device_id *match) { - int rc; - state = state_detached; /* Lookup the fans in the device tree */ fcu_lookup_fans(dev->node); /* Add the driver */ - rc = i2c_add_driver(&therm_pm72_driver); - if (rc < 0) - return rc; - return 0; + return i2c_add_driver(&therm_pm72_driver); } static int fcu_of_remove(struct of_device* dev) diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index a0a41ad0f2b..c62ed68a313 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -240,12 +240,7 @@ static int wf_lm75_detach(struct i2c_client *client) static int __init wf_lm75_sensor_init(void) { - int rc; - - rc = i2c_add_driver(&wf_lm75_driver); - if (rc < 0) - return rc; - return 0; + return i2c_add_driver(&wf_lm75_driver); } static void __exit wf_lm75_sensor_exit(void) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 252d55df964..76a189ceb52 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -315,6 +315,8 @@ static int write_page(struct bitmap *bitmap, struct page *page, int wait) if (bitmap->file == NULL) return write_sb_page(bitmap->mddev, bitmap->offset, page, wait); + flush_dcache_page(page); /* make sure visible to anyone reading the file */ + if (wait) lock_page(page); else { @@ -341,7 +343,7 @@ static int write_page(struct bitmap *bitmap, struct page *page, int wait) /* add to list to be waited for by daemon */ struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO); item->page = page; - page_cache_get(page); + get_page(page); spin_lock(&bitmap->write_lock); list_add(&item->list, &bitmap->complete_pages); spin_unlock(&bitmap->write_lock); @@ -357,10 +359,10 @@ static struct page *read_page(struct file *file, unsigned long index, struct inode *inode = file->f_mapping->host; struct page *page = NULL; loff_t isize = i_size_read(inode); - unsigned long end_index = isize >> PAGE_CACHE_SHIFT; + unsigned long end_index = isize >> PAGE_SHIFT; - PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_CACHE_SIZE, - (unsigned long long)index << PAGE_CACHE_SHIFT); + PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, + (unsigned long long)index << PAGE_SHIFT); page = read_cache_page(inode->i_mapping, index, (filler_t *)inode->i_mapping->a_ops->readpage, file); @@ -368,7 +370,7 @@ static struct page *read_page(struct file *file, unsigned long index, goto out; wait_on_page_locked(page); if (!PageUptodate(page) || PageError(page)) { - page_cache_release(page); + put_page(page); page = ERR_PTR(-EIO); goto out; } @@ -376,14 +378,14 @@ static struct page *read_page(struct file *file, unsigned long index, if (index > end_index) /* we have read beyond EOF */ *bytes_read = 0; else if (index == end_index) /* possible short read */ - *bytes_read = isize & ~PAGE_CACHE_MASK; + *bytes_read = isize & ~PAGE_MASK; else - *bytes_read = PAGE_CACHE_SIZE; /* got a full page */ + *bytes_read = PAGE_SIZE; /* got a full page */ out: if (IS_ERR(page)) printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", - (int)PAGE_CACHE_SIZE, - (unsigned long long)index << PAGE_CACHE_SHIFT, + (int)PAGE_SIZE, + (unsigned long long)index << PAGE_SHIFT, PTR_ERR(page)); return page; } @@ -406,11 +408,11 @@ int bitmap_update_sb(struct bitmap *bitmap) return 0; } spin_unlock_irqrestore(&bitmap->lock, flags); - sb = (bitmap_super_t *)kmap(bitmap->sb_page); + sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); sb->events = cpu_to_le64(bitmap->mddev->events); if (!bitmap->mddev->degraded) sb->events_cleared = cpu_to_le64(bitmap->mddev->events); - kunmap(bitmap->sb_page); + kunmap_atomic(sb, KM_USER0); return write_page(bitmap, bitmap->sb_page, 1); } @@ -421,7 +423,7 @@ void bitmap_print_sb(struct bitmap *bitmap) if (!bitmap || !bitmap->sb_page) return; - sb = (bitmap_super_t *)kmap(bitmap->sb_page); + sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); @@ -440,7 +442,7 @@ void bitmap_print_sb(struct bitmap *bitmap) printk(KERN_DEBUG " sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); - kunmap(bitmap->sb_page); + kunmap_atomic(sb, KM_USER0); } /* read the superblock from the bitmap file and initialize some bitmap fields */ @@ -466,7 +468,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) return err; } - sb = (bitmap_super_t *)kmap(bitmap->sb_page); + sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); if (bytes_read < sizeof(*sb)) { /* short read */ printk(KERN_INFO "%s: bitmap file superblock truncated\n", @@ -485,12 +487,12 @@ static int bitmap_read_sb(struct bitmap *bitmap) else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) reason = "unrecognized superblock version"; - else if (chunksize < 512 || chunksize > (1024 * 1024 * 4)) - reason = "bitmap chunksize out of range (512B - 4MB)"; + else if (chunksize < PAGE_SIZE) + reason = "bitmap chunksize too small"; else if ((1 << ffz(~chunksize)) != chunksize) reason = "bitmap chunksize not a power of 2"; - else if (daemon_sleep < 1 || daemon_sleep > 15) - reason = "daemon sleep period out of range (1-15s)"; + else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) + reason = "daemon sleep period out of range"; else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; if (reason) { @@ -535,7 +537,7 @@ success: bitmap->events_cleared = bitmap->mddev->events; err = 0; out: - kunmap(bitmap->sb_page); + kunmap_atomic(sb, KM_USER0); if (err) bitmap_print_sb(bitmap); return err; @@ -558,9 +560,9 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, spin_unlock_irqrestore(&bitmap->lock, flags); return; } - page_cache_get(bitmap->sb_page); + get_page(bitmap->sb_page); spin_unlock_irqrestore(&bitmap->lock, flags); - sb = (bitmap_super_t *)kmap(bitmap->sb_page); + sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); switch (op) { case MASK_SET: sb->state |= bits; break; @@ -568,8 +570,8 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, break; default: BUG(); } - kunmap(bitmap->sb_page); - page_cache_release(bitmap->sb_page); + kunmap_atomic(sb, KM_USER0); + put_page(bitmap->sb_page); } /* @@ -622,12 +624,11 @@ static void bitmap_file_unmap(struct bitmap *bitmap) while (pages--) if (map[pages]->index != 0) /* 0 is sb_page, release it below */ - page_cache_release(map[pages]); + put_page(map[pages]); kfree(map); kfree(attr); - if (sb_page) - page_cache_release(sb_page); + safe_put_page(sb_page); } static void bitmap_stop_daemon(struct bitmap *bitmap); @@ -654,7 +655,7 @@ static void drain_write_queues(struct bitmap *bitmap) while ((item = dequeue_page(bitmap))) { /* don't bother to wait */ - page_cache_release(item->page); + put_page(item->page); mempool_free(item, bitmap->write_pool); } @@ -763,7 +764,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) /* make sure the page stays cached until it gets written out */ if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY)) - page_cache_get(page); + get_page(page); /* set the bit */ kaddr = kmap_atomic(page, KM_USER0); @@ -854,6 +855,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) unsigned long bytes, offset, dummy; int outofdate; int ret = -ENOSPC; + void *paddr; chunks = bitmap->chunks; file = bitmap->file; @@ -887,12 +889,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) if (!bitmap->filemap) goto out; - bitmap->filemap_attr = kmalloc(sizeof(long) * num_pages, GFP_KERNEL); + bitmap->filemap_attr = kzalloc(sizeof(long) * num_pages, GFP_KERNEL); if (!bitmap->filemap_attr) goto out; - memset(bitmap->filemap_attr, 0, sizeof(long) * num_pages); - oldindex = ~0L; for (i = 0; i < chunks; i++) { @@ -901,8 +901,6 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) bit = file_page_offset(i); if (index != oldindex) { /* this is a new page, read it in */ /* unmap the old page, we're done with it */ - if (oldpage != NULL) - kunmap(oldpage); if (index == 0) { /* * if we're here then the superblock page @@ -925,30 +923,32 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) oldindex = index; oldpage = page; - kmap(page); if (outofdate) { /* * if bitmap is out of date, dirty the * whole page and write it out */ - memset(page_address(page) + offset, 0xff, + paddr = kmap_atomic(page, KM_USER0); + memset(paddr + offset, 0xff, PAGE_SIZE - offset); + kunmap_atomic(paddr, KM_USER0); ret = write_page(bitmap, page, 1); if (ret) { - kunmap(page); /* release, page not in filemap yet */ - page_cache_release(page); + put_page(page); goto out; } } bitmap->filemap[bitmap->file_pages++] = page; } + paddr = kmap_atomic(page, KM_USER0); if (bitmap->flags & BITMAP_HOSTENDIAN) - b = test_bit(bit, page_address(page)); + b = test_bit(bit, paddr); else - b = ext2_test_bit(bit, page_address(page)); + b = ext2_test_bit(bit, paddr); + kunmap_atomic(paddr, KM_USER0); if (b) { /* if the disk bit is set, set the memory bit */ bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), @@ -963,9 +963,6 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ret = 0; bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); - if (page) /* unmap the last page */ - kunmap(page); - if (bit_cnt) { /* Kick recovery if any bits were set */ set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); md_wakeup_thread(bitmap->mddev->thread); @@ -1021,6 +1018,7 @@ int bitmap_daemon_work(struct bitmap *bitmap) int err = 0; int blocks; int attr; + void *paddr; if (bitmap == NULL) return 0; @@ -1043,7 +1041,7 @@ int bitmap_daemon_work(struct bitmap *bitmap) /* skip this page unless it's marked as needing cleaning */ if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) { if (attr & BITMAP_PAGE_NEEDWRITE) { - page_cache_get(page); + get_page(page); clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); } spin_unlock_irqrestore(&bitmap->lock, flags); @@ -1057,13 +1055,13 @@ int bitmap_daemon_work(struct bitmap *bitmap) default: bitmap_file_kick(bitmap); } - page_cache_release(page); + put_page(page); } continue; } /* grab the new page, sync and release the old */ - page_cache_get(page); + get_page(page); if (lastpage != NULL) { if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); @@ -1077,14 +1075,12 @@ int bitmap_daemon_work(struct bitmap *bitmap) set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); } - kunmap(lastpage); - page_cache_release(lastpage); + put_page(lastpage); if (err) bitmap_file_kick(bitmap); } else spin_unlock_irqrestore(&bitmap->lock, flags); lastpage = page; - kmap(page); /* printk("bitmap clean at page %lu\n", j); */ @@ -1107,10 +1103,12 @@ int bitmap_daemon_work(struct bitmap *bitmap) -1); /* clear the bit */ + paddr = kmap_atomic(page, KM_USER0); if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(j), page_address(page)); + clear_bit(file_page_offset(j), paddr); else - ext2_clear_bit(file_page_offset(j), page_address(page)); + ext2_clear_bit(file_page_offset(j), paddr); + kunmap_atomic(paddr, KM_USER0); } } spin_unlock_irqrestore(&bitmap->lock, flags); @@ -1118,7 +1116,6 @@ int bitmap_daemon_work(struct bitmap *bitmap) /* now sync the final page */ if (lastpage != NULL) { - kunmap(lastpage); spin_lock_irqsave(&bitmap->lock, flags); if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); @@ -1133,7 +1130,7 @@ int bitmap_daemon_work(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); } - page_cache_release(lastpage); + put_page(lastpage); } return err; @@ -1184,7 +1181,7 @@ static void bitmap_writeback_daemon(mddev_t *mddev) PRINTK("finished page writeback: %p\n", page); err = PageError(page); - page_cache_release(page); + put_page(page); if (err) { printk(KERN_WARNING "%s: bitmap file writeback " "failed (page %lu): %d\n", @@ -1530,6 +1527,8 @@ void bitmap_destroy(mddev_t *mddev) return; mddev->bitmap = NULL; /* disconnect from the md device */ + if (mddev->thread) + mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; bitmap_free(bitmap); } @@ -1555,12 +1554,10 @@ int bitmap_create(mddev_t *mddev) BUG_ON(file && mddev->bitmap_offset); - bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL); + bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) return -ENOMEM; - memset(bitmap, 0, sizeof(*bitmap)); - spin_lock_init(&bitmap->lock); bitmap->mddev = mddev; @@ -1601,12 +1598,11 @@ int bitmap_create(mddev_t *mddev) #ifdef INJECT_FATAL_FAULT_1 bitmap->bp = NULL; #else - bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); + bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); #endif err = -ENOMEM; if (!bitmap->bp) goto error; - memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp)); bitmap->flags |= BITMAP_ACTIVE; @@ -1636,6 +1632,8 @@ int bitmap_create(mddev_t *mddev) if (IS_ERR(bitmap->writeback_daemon)) return PTR_ERR(bitmap->writeback_daemon); + mddev->thread->timeout = bitmap->daemon_sleep * HZ; + return bitmap_update_sb(bitmap); error: diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cf663105668..a601a427885 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -690,6 +690,8 @@ bad3: bad2: crypto_free_tfm(tfm); bad1: + /* Must zero key material before freeing */ + memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kfree(cc); return -EINVAL; } @@ -706,6 +708,9 @@ static void crypt_dtr(struct dm_target *ti) cc->iv_gen_ops->dtr(cc); crypto_free_tfm(cc->tfm); dm_put_device(ti, cc->dev); + + /* Must zero key material before freeing */ + memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kfree(cc); } diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h index 1a77f326570..f9035bfd1a9 100644 --- a/drivers/md/dm-io.h +++ b/drivers/md/dm-io.h @@ -9,9 +9,6 @@ #include "dm.h" -/* FIXME make this configurable */ -#define DM_MAX_IO_REGIONS 8 - struct io_region { struct block_device *bdev; sector_t sector; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 07d44e19536..561bda5011e 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -270,6 +270,7 @@ static int dm_hash_rename(const char *old, const char *new) { char *new_name, *old_name; struct hash_cell *hc; + struct dm_table *table; /* * duplicate new. @@ -317,6 +318,15 @@ static int dm_hash_rename(const char *old, const char *new) /* rename the device node in devfs */ register_with_devfs(hc); + /* + * Wake up any dm event waiters. + */ + table = dm_get_table(hc->md); + if (table) { + dm_table_event(table); + dm_table_put(table); + } + up_write(&_hash_lock); kfree(old_name); return 0; @@ -683,14 +693,18 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) static int do_suspend(struct dm_ioctl *param) { int r = 0; + int do_lockfs = 1; struct mapped_device *md; md = find_device(param); if (!md) return -ENXIO; + if (param->flags & DM_SKIP_LOCKFS_FLAG) + do_lockfs = 0; + if (!dm_suspended(md)) - r = dm_suspend(md); + r = dm_suspend(md, do_lockfs); if (!r) r = __dev_status(md, param); @@ -702,6 +716,7 @@ static int do_suspend(struct dm_ioctl *param) static int do_resume(struct dm_ioctl *param) { int r = 0; + int do_lockfs = 1; struct hash_cell *hc; struct mapped_device *md; struct dm_table *new_map; @@ -727,8 +742,10 @@ static int do_resume(struct dm_ioctl *param) /* Do we need to load a new map ? */ if (new_map) { /* Suspend if it isn't already suspended */ + if (param->flags & DM_SKIP_LOCKFS_FLAG) + do_lockfs = 0; if (!dm_suspended(md)) - dm_suspend(md); + dm_suspend(md, do_lockfs); r = dm_swap_table(md, new_map); if (r) { diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a76349cb10a..efe4adf7853 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -573,7 +573,7 @@ static int core_get_resync_work(struct dirty_log *log, region_t *region) lc->sync_search); lc->sync_search = *region + 1; - if (*region == lc->region_count) + if (*region >= lc->region_count) return 0; } while (log_test_bit(lc->recovering_bits, *region)); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6b0fc167092..6cfa8d435d5 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -562,6 +562,8 @@ struct mirror_set { region_t nr_regions; int in_sync; + struct mirror *default_mirror; /* Default mirror */ + unsigned int nr_mirrors; struct mirror mirror[0]; }; @@ -611,7 +613,7 @@ static int recover(struct mirror_set *ms, struct region *reg) unsigned long flags = 0; /* fill in the source */ - m = ms->mirror + DEFAULT_MIRROR; + m = ms->default_mirror; from.bdev = m->dev->bdev; from.sector = m->offset + region_to_sector(reg->rh, reg->key); if (reg->key == (ms->nr_regions - 1)) { @@ -627,7 +629,7 @@ static int recover(struct mirror_set *ms, struct region *reg) /* fill in the destinations */ for (i = 0, dest = to; i < ms->nr_mirrors; i++) { - if (i == DEFAULT_MIRROR) + if (&ms->mirror[i] == ms->default_mirror) continue; m = ms->mirror + i; @@ -682,7 +684,7 @@ static void do_recovery(struct mirror_set *ms) static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) { /* FIXME: add read balancing */ - return ms->mirror + DEFAULT_MIRROR; + return ms->default_mirror; } /* @@ -709,7 +711,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) if (rh_in_sync(&ms->rh, region, 0)) m = choose_mirror(ms, bio->bi_sector); else - m = ms->mirror + DEFAULT_MIRROR; + m = ms->default_mirror; map_bio(ms, m, bio); generic_make_request(bio); @@ -833,7 +835,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) rh_delay(&ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { - map_bio(ms, ms->mirror + DEFAULT_MIRROR, bio); + map_bio(ms, ms->default_mirror, bio); generic_make_request(bio); } } @@ -900,6 +902,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, ms->nr_mirrors = nr_mirrors; ms->nr_regions = dm_sector_div_up(ti->len, region_size); ms->in_sync = 0; + ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { ti->error = "dm-mirror: Error creating dirty region hash"; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ab54f99b7c3..4b9dd8fb1e5 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -371,6 +371,20 @@ static inline ulong round_up(ulong n, ulong size) return (n + size) & ~size; } +static void read_snapshot_metadata(struct dm_snapshot *s) +{ + if (s->have_metadata) + return; + + if (s->store.read_metadata(&s->store)) { + down_write(&s->lock); + s->valid = 0; + up_write(&s->lock); + } + + s->have_metadata = 1; +} + /* * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> */ @@ -848,16 +862,7 @@ static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = (struct dm_snapshot *) ti->private; - if (s->have_metadata) - return; - - if (s->store.read_metadata(&s->store)) { - down_write(&s->lock); - s->valid = 0; - up_write(&s->lock); - } - - s->have_metadata = 1; + read_snapshot_metadata(s); } static int snapshot_status(struct dm_target *ti, status_type_t type, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 930b9fc2795..0e481512f91 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -55,6 +55,7 @@ union map_info *dm_get_mapinfo(struct bio *bio) */ #define DMF_BLOCK_IO 0 #define DMF_SUSPENDED 1 +#define DMF_FROZEN 2 struct mapped_device { struct rw_semaphore io_lock; @@ -97,7 +98,7 @@ struct mapped_device { * freeze/thaw support require holding onto a super block */ struct super_block *frozen_sb; - struct block_device *frozen_bdev; + struct block_device *suspended_bdev; }; #define MIN_IOS 256 @@ -836,9 +837,9 @@ static void __set_size(struct mapped_device *md, sector_t size) { set_capacity(md->disk, size); - down(&md->frozen_bdev->bd_inode->i_sem); - i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); - up(&md->frozen_bdev->bd_inode->i_sem); + down(&md->suspended_bdev->bd_inode->i_sem); + i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); + up(&md->suspended_bdev->bd_inode->i_sem); } static int __bind(struct mapped_device *md, struct dm_table *t) @@ -902,10 +903,9 @@ int dm_create_with_minor(unsigned int minor, struct mapped_device **result) return create_aux(minor, 1, result); } -void *dm_get_mdptr(dev_t dev) +static struct mapped_device *dm_find_md(dev_t dev) { struct mapped_device *md; - void *mdptr = NULL; unsigned minor = MINOR(dev); if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) @@ -914,12 +914,32 @@ void *dm_get_mdptr(dev_t dev) down(&_minor_lock); md = idr_find(&_minor_idr, minor); - - if (md && (dm_disk(md)->first_minor == minor)) - mdptr = md->interface_ptr; + if (!md || (dm_disk(md)->first_minor != minor)) + md = NULL; up(&_minor_lock); + return md; +} + +struct mapped_device *dm_get_md(dev_t dev) +{ + struct mapped_device *md = dm_find_md(dev); + + if (md) + dm_get(md); + + return md; +} + +void *dm_get_mdptr(dev_t dev) +{ + struct mapped_device *md; + void *mdptr = NULL; + + md = dm_find_md(dev); + if (md) + mdptr = md->interface_ptr; return mdptr; } @@ -991,43 +1011,33 @@ out: */ static int lock_fs(struct mapped_device *md) { - int r = -ENOMEM; - - md->frozen_bdev = bdget_disk(md->disk, 0); - if (!md->frozen_bdev) { - DMWARN("bdget failed in lock_fs"); - goto out; - } + int r; WARN_ON(md->frozen_sb); - md->frozen_sb = freeze_bdev(md->frozen_bdev); + md->frozen_sb = freeze_bdev(md->suspended_bdev); if (IS_ERR(md->frozen_sb)) { r = PTR_ERR(md->frozen_sb); - goto out_bdput; + md->frozen_sb = NULL; + return r; } + set_bit(DMF_FROZEN, &md->flags); + /* don't bdput right now, we don't want the bdev - * to go away while it is locked. We'll bdput - * in unlock_fs + * to go away while it is locked. */ return 0; - -out_bdput: - bdput(md->frozen_bdev); - md->frozen_sb = NULL; - md->frozen_bdev = NULL; -out: - return r; } static void unlock_fs(struct mapped_device *md) { - thaw_bdev(md->frozen_bdev, md->frozen_sb); - bdput(md->frozen_bdev); + if (!test_bit(DMF_FROZEN, &md->flags)) + return; + thaw_bdev(md->suspended_bdev, md->frozen_sb); md->frozen_sb = NULL; - md->frozen_bdev = NULL; + clear_bit(DMF_FROZEN, &md->flags); } /* @@ -1037,7 +1047,7 @@ static void unlock_fs(struct mapped_device *md) * dm_bind_table, dm_suspend must be called to flush any in * flight bios and ensure that any further io gets deferred. */ -int dm_suspend(struct mapped_device *md) +int dm_suspend(struct mapped_device *md, int do_lockfs) { struct dm_table *map = NULL; DECLARE_WAITQUEUE(wait, current); @@ -1053,10 +1063,19 @@ int dm_suspend(struct mapped_device *md) /* This does not get reverted if there's an error later. */ dm_table_presuspend_targets(map); - /* Flush I/O to the device. */ - r = lock_fs(md); - if (r) + md->suspended_bdev = bdget_disk(md->disk, 0); + if (!md->suspended_bdev) { + DMWARN("bdget failed in dm_suspend"); + r = -ENOMEM; goto out; + } + + /* Flush I/O to the device. */ + if (do_lockfs) { + r = lock_fs(md); + if (r) + goto out; + } /* * First we set the BLOCK_IO flag so no more ios will be mapped. @@ -1105,6 +1124,11 @@ int dm_suspend(struct mapped_device *md) r = 0; out: + if (r && md->suspended_bdev) { + bdput(md->suspended_bdev); + md->suspended_bdev = NULL; + } + dm_table_put(map); up(&md->suspend_lock); return r; @@ -1135,6 +1159,9 @@ int dm_resume(struct mapped_device *md) unlock_fs(md); + bdput(md->suspended_bdev); + md->suspended_bdev = NULL; + clear_bit(DMF_SUSPENDED, &md->flags); dm_table_unplug_all(map); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index e38c3fc1a1d..4eaf075da21 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -28,7 +28,7 @@ * in types.h. */ #ifdef CONFIG_LBD -#define SECTOR_FORMAT "%Lu" +#define SECTOR_FORMAT "%llu" #else #define SECTOR_FORMAT "%lu" #endif @@ -58,6 +58,7 @@ int dm_create(struct mapped_device **md); int dm_create_with_minor(unsigned int minor, struct mapped_device **md); void dm_set_mdptr(struct mapped_device *md, void *ptr); void *dm_get_mdptr(dev_t dev); +struct mapped_device *dm_get_md(dev_t dev); /* * Reference counting for md. @@ -68,7 +69,7 @@ void dm_put(struct mapped_device *md); /* * A device can still be used while suspended, but I/O is deferred. */ -int dm_suspend(struct mapped_device *md); +int dm_suspend(struct mapped_device *md, int with_lockfs); int dm_resume(struct mapped_device *md); /* diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 0248f8e7eac..a7a5ab55433 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -316,9 +316,10 @@ static int stop(mddev_t *mddev) return 0; } -static mdk_personality_t faulty_personality = +static struct mdk_personality faulty_personality = { .name = "faulty", + .level = LEVEL_FAULTY, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -329,15 +330,17 @@ static mdk_personality_t faulty_personality = static int __init raid_init(void) { - return register_md_personality(FAULTY, &faulty_personality); + return register_md_personality(&faulty_personality); } static void raid_exit(void) { - unregister_md_personality(FAULTY); + unregister_md_personality(&faulty_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-10"); /* faulty */ +MODULE_ALIAS("md-faulty"); +MODULE_ALIAS("md-level--5"); diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index eb703648597..ca99979c868 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -561,11 +561,13 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, * Cancels a kcopyd job, eg. someone might be deactivating a * mirror. */ +#if 0 int kcopyd_cancel(struct kcopyd_job *job, int block) { /* FIXME: finish */ return -1; } +#endif /* 0 */ /*----------------------------------------------------------------- * Unit setup @@ -684,4 +686,3 @@ void kcopyd_client_destroy(struct kcopyd_client *kc) EXPORT_SYMBOL(kcopyd_client_create); EXPORT_SYMBOL(kcopyd_client_destroy); EXPORT_SYMBOL(kcopyd_copy); -EXPORT_SYMBOL(kcopyd_cancel); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 946efef3a8f..777585458c8 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -121,11 +121,10 @@ static int linear_run (mddev_t *mddev) sector_t curr_offset; struct list_head *tmp; - conf = kmalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t), + conf = kzalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t), GFP_KERNEL); if (!conf) goto out; - memset(conf, 0, sizeof(*conf) + mddev->raid_disks*sizeof(dev_info_t)); mddev->private = conf; cnt = 0; @@ -352,9 +351,10 @@ static void linear_status (struct seq_file *seq, mddev_t *mddev) } -static mdk_personality_t linear_personality= +static struct mdk_personality linear_personality = { .name = "linear", + .level = LEVEL_LINEAR, .owner = THIS_MODULE, .make_request = linear_make_request, .run = linear_run, @@ -364,16 +364,18 @@ static mdk_personality_t linear_personality= static int __init linear_init (void) { - return register_md_personality (LINEAR, &linear_personality); + return register_md_personality (&linear_personality); } static void linear_exit (void) { - unregister_md_personality (LINEAR); + unregister_md_personality (&linear_personality); } module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS("md-personality-1"); /* LINEAR */ +MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ +MODULE_ALIAS("md-linear"); +MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md.c b/drivers/md/md.c index 8175a2a222d..1b76fb29fb7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -42,6 +42,7 @@ #include <linux/devfs_fs_kernel.h> #include <linux/buffer_head.h> /* for invalidate_bdev */ #include <linux/suspend.h> +#include <linux/poll.h> #include <linux/init.h> @@ -67,7 +68,7 @@ static void autostart_arrays (int part); #endif -static mdk_personality_t *pers[MAX_PERSONALITY]; +static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); /* @@ -80,10 +81,22 @@ static DEFINE_SPINLOCK(pers_lock); * idle IO detection. * * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. + * or /sys/block/mdX/md/sync_speed_{min,max} */ static int sysctl_speed_limit_min = 1000; static int sysctl_speed_limit_max = 200000; +static inline int speed_min(mddev_t *mddev) +{ + return mddev->sync_speed_min ? + mddev->sync_speed_min : sysctl_speed_limit_min; +} + +static inline int speed_max(mddev_t *mddev) +{ + return mddev->sync_speed_max ? + mddev->sync_speed_max : sysctl_speed_limit_max; +} static struct ctl_table_header *raid_table_header; @@ -134,6 +147,24 @@ static struct block_device_operations md_fops; static int start_readonly; /* + * We have a system wide 'event count' that is incremented + * on any 'interesting' event, and readers of /proc/mdstat + * can use 'poll' or 'select' to find out when the event + * count increases. + * + * Events are: + * start array, stop array, error, add device, remove device, + * start build, activate spare + */ +static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); +static atomic_t md_event_count; +static void md_new_event(mddev_t *mddev) +{ + atomic_inc(&md_event_count); + wake_up(&md_event_waiters); +} + +/* * Enables to iterate over all existing md arrays * all_mddevs_lock protects this list. */ @@ -209,12 +240,10 @@ static mddev_t * mddev_find(dev_t unit) } spin_unlock(&all_mddevs_lock); - new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; - memset(new, 0, sizeof(*new)); - new->unit = unit; if (MAJOR(unit) == MD_MAJOR) new->md_minor = MINOR(unit); @@ -262,7 +291,7 @@ static inline void mddev_unlock(mddev_t * mddev) md_wakeup_thread(mddev->thread); } -mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) +static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) { mdk_rdev_t * rdev; struct list_head *tmp; @@ -286,6 +315,18 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) return NULL; } +static struct mdk_personality *find_pers(int level, char *clevel) +{ + struct mdk_personality *pers; + list_for_each_entry(pers, &pers_list, list) { + if (level != LEVEL_NONE && pers->level == level) + return pers; + if (strcmp(pers->name, clevel)==0) + return pers; + } + return NULL; +} + static inline sector_t calc_dev_sboffset(struct block_device *bdev) { sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; @@ -320,7 +361,7 @@ static int alloc_disk_sb(mdk_rdev_t * rdev) static void free_disk_sb(mdk_rdev_t * rdev) { if (rdev->sb_page) { - page_cache_release(rdev->sb_page); + put_page(rdev->sb_page); rdev->sb_loaded = 0; rdev->sb_page = NULL; rdev->sb_offset = 0; @@ -461,6 +502,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, bio_put(bio); return ret; } +EXPORT_SYMBOL_GPL(sync_page_io); static int read_disk_sb(mdk_rdev_t * rdev, int size) { @@ -665,6 +707,10 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version } rdev->size = calc_dev_size(rdev, sb->chunk_size); + if (rdev->size < sb->size && sb->level > 1) + /* "this cannot possibly happen" ... */ + ret = -EINVAL; + abort: return ret; } @@ -688,6 +734,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; + mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; mddev->size = sb->size; @@ -714,9 +761,10 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && mddev->bitmap_file == NULL) { - if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) { + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 + && mddev->level != 10) { /* FIXME use a better test */ - printk(KERN_WARNING "md: bitmaps only support for raid1\n"); + printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); return -EINVAL; } mddev->bitmap_offset = mddev->default_bitmap_offset; @@ -968,6 +1016,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); + atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; @@ -1006,6 +1055,9 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->size = le64_to_cpu(sb->data_size)/2; if (le32_to_cpu(sb->chunksize)) rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1); + + if (le32_to_cpu(sb->size) > rdev->size*2) + return -EINVAL; return 0; } @@ -1023,6 +1075,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); + mddev->clevel[0] = 0; mddev->layout = le32_to_cpu(sb->layout); mddev->raid_disks = le32_to_cpu(sb->raid_disks); mddev->size = le64_to_cpu(sb->size)/2; @@ -1037,8 +1090,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && mddev->bitmap_file == NULL ) { - if (mddev->level != 1) { - printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 + && mddev->level != 10) { + printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); return -EINVAL; } mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); @@ -1105,6 +1159,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) else sb->resync_offset = cpu_to_le64(0); + sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors); + if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); @@ -1187,6 +1243,14 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) MD_BUG(); return -EINVAL; } + /* make sure rdev->size exceeds mddev->size */ + if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { + if (mddev->pers) + /* Cannot change size, so fail */ + return -ENOSPC; + else + mddev->size = rdev->size; + } same_pdev = match_dev_unit(mddev, rdev); if (same_pdev) printk(KERN_WARNING @@ -1496,6 +1560,26 @@ repeat: } +/* words written to sysfs files may, or my not, be \n terminated. + * We want to accept with case. For this we use cmd_match. + */ +static int cmd_match(const char *cmd, const char *str) +{ + /* See if cmd, written into a sysfs file, matches + * str. They must either be the same, or cmd can + * have a trailing newline + */ + while (*cmd && *str && *cmd == *str) { + cmd++; + str++; + } + if (*cmd == '\n') + cmd++; + if (*str || *cmd) + return 0; + return 1; +} + struct rdev_sysfs_entry { struct attribute attr; ssize_t (*show)(mdk_rdev_t *, char *); @@ -1538,9 +1622,113 @@ super_show(mdk_rdev_t *rdev, char *page) } static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super); +static ssize_t +errors_show(mdk_rdev_t *rdev, char *page) +{ + return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors)); +} + +static ssize_t +errors_store(mdk_rdev_t *rdev, const char *buf, size_t len) +{ + char *e; + unsigned long n = simple_strtoul(buf, &e, 10); + if (*buf && (*e == 0 || *e == '\n')) { + atomic_set(&rdev->corrected_errors, n); + return len; + } + return -EINVAL; +} +static struct rdev_sysfs_entry rdev_errors = +__ATTR(errors, 0644, errors_show, errors_store); + +static ssize_t +slot_show(mdk_rdev_t *rdev, char *page) +{ + if (rdev->raid_disk < 0) + return sprintf(page, "none\n"); + else + return sprintf(page, "%d\n", rdev->raid_disk); +} + +static ssize_t +slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) +{ + char *e; + int slot = simple_strtoul(buf, &e, 10); + if (strncmp(buf, "none", 4)==0) + slot = -1; + else if (e==buf || (*e && *e!= '\n')) + return -EINVAL; + if (rdev->mddev->pers) + /* Cannot set slot in active array (yet) */ + return -EBUSY; + if (slot >= rdev->mddev->raid_disks) + return -ENOSPC; + rdev->raid_disk = slot; + /* assume it is working */ + rdev->flags = 0; + set_bit(In_sync, &rdev->flags); + return len; +} + + +static struct rdev_sysfs_entry rdev_slot = +__ATTR(slot, 0644, slot_show, slot_store); + +static ssize_t +offset_show(mdk_rdev_t *rdev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset); +} + +static ssize_t +offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) +{ + char *e; + unsigned long long offset = simple_strtoull(buf, &e, 10); + if (e==buf || (*e && *e != '\n')) + return -EINVAL; + if (rdev->mddev->pers) + return -EBUSY; + rdev->data_offset = offset; + return len; +} + +static struct rdev_sysfs_entry rdev_offset = +__ATTR(offset, 0644, offset_show, offset_store); + +static ssize_t +rdev_size_show(mdk_rdev_t *rdev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)rdev->size); +} + +static ssize_t +rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) +{ + char *e; + unsigned long long size = simple_strtoull(buf, &e, 10); + if (e==buf || (*e && *e != '\n')) + return -EINVAL; + if (rdev->mddev->pers) + return -EBUSY; + rdev->size = size; + if (size < rdev->mddev->size || rdev->mddev->size == 0) + rdev->mddev->size = size; + return len; +} + +static struct rdev_sysfs_entry rdev_size = +__ATTR(size, 0644, rdev_size_show, rdev_size_store); + static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, &rdev_super.attr, + &rdev_errors.attr, + &rdev_slot.attr, + &rdev_offset.attr, + &rdev_size.attr, NULL, }; static ssize_t @@ -1598,12 +1786,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi mdk_rdev_t *rdev; sector_t size; - rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); + rdev = kzalloc(sizeof(*rdev), GFP_KERNEL); if (!rdev) { printk(KERN_ERR "md: could not alloc mem for new device!\n"); return ERR_PTR(-ENOMEM); } - memset(rdev, 0, sizeof(*rdev)); if ((err = alloc_disk_sb(rdev))) goto abort_free; @@ -1621,6 +1808,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi rdev->data_offset = 0; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); + atomic_set(&rdev->corrected_errors, 0); size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; if (!size) { @@ -1725,16 +1913,37 @@ static void analyze_sbs(mddev_t * mddev) static ssize_t level_show(mddev_t *mddev, char *page) { - mdk_personality_t *p = mddev->pers; - if (p == NULL && mddev->raid_disks == 0) - return 0; - if (mddev->level >= 0) - return sprintf(page, "raid%d\n", mddev->level); - else + struct mdk_personality *p = mddev->pers; + if (p) return sprintf(page, "%s\n", p->name); + else if (mddev->clevel[0]) + return sprintf(page, "%s\n", mddev->clevel); + else if (mddev->level != LEVEL_NONE) + return sprintf(page, "%d\n", mddev->level); + else + return 0; +} + +static ssize_t +level_store(mddev_t *mddev, const char *buf, size_t len) +{ + int rv = len; + if (mddev->pers) + return -EBUSY; + if (len == 0) + return 0; + if (len >= sizeof(mddev->clevel)) + return -ENOSPC; + strncpy(mddev->clevel, buf, len); + if (mddev->clevel[len-1] == '\n') + len--; + mddev->clevel[len] = 0; + mddev->level = LEVEL_NONE; + return rv; } -static struct md_sysfs_entry md_level = __ATTR_RO(level); +static struct md_sysfs_entry md_level = +__ATTR(level, 0644, level_show, level_store); static ssize_t raid_disks_show(mddev_t *mddev, char *page) @@ -1744,7 +1953,197 @@ raid_disks_show(mddev_t *mddev, char *page) return sprintf(page, "%d\n", mddev->raid_disks); } -static struct md_sysfs_entry md_raid_disks = __ATTR_RO(raid_disks); +static int update_raid_disks(mddev_t *mddev, int raid_disks); + +static ssize_t +raid_disks_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* can only set raid_disks if array is not yet active */ + char *e; + int rv = 0; + unsigned long n = simple_strtoul(buf, &e, 10); + + if (!*buf || (*e && *e != '\n')) + return -EINVAL; + + if (mddev->pers) + rv = update_raid_disks(mddev, n); + else + mddev->raid_disks = n; + return rv ? rv : len; +} +static struct md_sysfs_entry md_raid_disks = +__ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store); + +static ssize_t +chunk_size_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d\n", mddev->chunk_size); +} + +static ssize_t +chunk_size_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* can only set chunk_size if array is not yet active */ + char *e; + unsigned long n = simple_strtoul(buf, &e, 10); + + if (mddev->pers) + return -EBUSY; + if (!*buf || (*e && *e != '\n')) + return -EINVAL; + + mddev->chunk_size = n; + return len; +} +static struct md_sysfs_entry md_chunk_size = +__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); + +static ssize_t +null_show(mddev_t *mddev, char *page) +{ + return -EINVAL; +} + +static ssize_t +new_dev_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* buf must be %d:%d\n? giving major and minor numbers */ + /* The new device is added to the array. + * If the array has a persistent superblock, we read the + * superblock to initialise info and check validity. + * Otherwise, only checking done is that in bind_rdev_to_array, + * which mainly checks size. + */ + char *e; + int major = simple_strtoul(buf, &e, 10); + int minor; + dev_t dev; + mdk_rdev_t *rdev; + int err; + + if (!*buf || *e != ':' || !e[1] || e[1] == '\n') + return -EINVAL; + minor = simple_strtoul(e+1, &e, 10); + if (*e && *e != '\n') + return -EINVAL; + dev = MKDEV(major, minor); + if (major != MAJOR(dev) || + minor != MINOR(dev)) + return -EOVERFLOW; + + + if (mddev->persistent) { + rdev = md_import_device(dev, mddev->major_version, + mddev->minor_version); + if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) { + mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, + mdk_rdev_t, same_set); + err = super_types[mddev->major_version] + .load_super(rdev, rdev0, mddev->minor_version); + if (err < 0) + goto out; + } + } else + rdev = md_import_device(dev, -1, -1); + + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + err = bind_rdev_to_array(rdev, mddev); + out: + if (err) + export_rdev(rdev); + return err ? err : len; +} + +static struct md_sysfs_entry md_new_device = +__ATTR(new_dev, 0200, null_show, new_dev_store); + +static ssize_t +size_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)mddev->size); +} + +static int update_size(mddev_t *mddev, unsigned long size); + +static ssize_t +size_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* If array is inactive, we can reduce the component size, but + * not increase it (except from 0). + * If array is active, we can try an on-line resize + */ + char *e; + int err = 0; + unsigned long long size = simple_strtoull(buf, &e, 10); + if (!*buf || *buf == '\n' || + (*e && *e != '\n')) + return -EINVAL; + + if (mddev->pers) { + err = update_size(mddev, size); + md_update_sb(mddev); + } else { + if (mddev->size == 0 || + mddev->size > size) + mddev->size = size; + else + err = -ENOSPC; + } + return err ? err : len; +} + +static struct md_sysfs_entry md_size = +__ATTR(component_size, 0644, size_show, size_store); + + +/* Metdata version. + * This is either 'none' for arrays with externally managed metadata, + * or N.M for internally known formats + */ +static ssize_t +metadata_show(mddev_t *mddev, char *page) +{ + if (mddev->persistent) + return sprintf(page, "%d.%d\n", + mddev->major_version, mddev->minor_version); + else + return sprintf(page, "none\n"); +} + +static ssize_t +metadata_store(mddev_t *mddev, const char *buf, size_t len) +{ + int major, minor; + char *e; + if (!list_empty(&mddev->disks)) + return -EBUSY; + + if (cmd_match(buf, "none")) { + mddev->persistent = 0; + mddev->major_version = 0; + mddev->minor_version = 90; + return len; + } + major = simple_strtoul(buf, &e, 10); + if (e==buf || *e != '.') + return -EINVAL; + buf = e+1; + minor = simple_strtoul(buf, &e, 10); + if (e==buf || *e != '\n') + return -EINVAL; + if (major >= sizeof(super_types)/sizeof(super_types[0]) || + super_types[major].name == NULL) + return -ENOENT; + mddev->major_version = major; + mddev->minor_version = minor; + mddev->persistent = 1; + return len; +} + +static struct md_sysfs_entry md_metadata = +__ATTR(metadata_version, 0644, metadata_show, metadata_store); static ssize_t action_show(mddev_t *mddev, char *page) @@ -1771,31 +2170,27 @@ action_store(mddev_t *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (strcmp(page, "idle")==0 || strcmp(page, "idle\n")==0) { + if (cmd_match(page, "idle")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; mddev->recovery = 0; } - return len; - } - - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return -EBUSY; - if (strcmp(page, "resync")==0 || strcmp(page, "resync\n")==0 || - strcmp(page, "recover")==0 || strcmp(page, "recover\n")==0) + else if (cmd_match(page, "resync") || cmd_match(page, "recover")) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); else { - if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0) + if (cmd_match(page, "check")) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); - else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0) + else if (cmd_match(page, "repair")) return -EINVAL; set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); return len; } @@ -1814,15 +2209,107 @@ md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); +static ssize_t +sync_min_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d (%s)\n", speed_min(mddev), + mddev->sync_speed_min ? "local": "system"); +} + +static ssize_t +sync_min_store(mddev_t *mddev, const char *buf, size_t len) +{ + int min; + char *e; + if (strncmp(buf, "system", 6)==0) { + mddev->sync_speed_min = 0; + return len; + } + min = simple_strtoul(buf, &e, 10); + if (buf == e || (*e && *e != '\n') || min <= 0) + return -EINVAL; + mddev->sync_speed_min = min; + return len; +} + +static struct md_sysfs_entry md_sync_min = +__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store); + +static ssize_t +sync_max_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d (%s)\n", speed_max(mddev), + mddev->sync_speed_max ? "local": "system"); +} + +static ssize_t +sync_max_store(mddev_t *mddev, const char *buf, size_t len) +{ + int max; + char *e; + if (strncmp(buf, "system", 6)==0) { + mddev->sync_speed_max = 0; + return len; + } + max = simple_strtoul(buf, &e, 10); + if (buf == e || (*e && *e != '\n') || max <= 0) + return -EINVAL; + mddev->sync_speed_max = max; + return len; +} + +static struct md_sysfs_entry md_sync_max = +__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store); + + +static ssize_t +sync_speed_show(mddev_t *mddev, char *page) +{ + unsigned long resync, dt, db; + resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + dt = ((jiffies - mddev->resync_mark) / HZ); + if (!dt) dt++; + db = resync - (mddev->resync_mark_cnt); + return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ +} + +static struct md_sysfs_entry +md_sync_speed = __ATTR_RO(sync_speed); + +static ssize_t +sync_completed_show(mddev_t *mddev, char *page) +{ + unsigned long max_blocks, resync; + + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + max_blocks = mddev->resync_max_sectors; + else + max_blocks = mddev->size << 1; + + resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + return sprintf(page, "%lu / %lu\n", resync, max_blocks); +} + +static struct md_sysfs_entry +md_sync_completed = __ATTR_RO(sync_completed); + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_raid_disks.attr, + &md_chunk_size.attr, + &md_size.attr, + &md_metadata.attr, + &md_new_device.attr, NULL, }; static struct attribute *md_redundancy_attrs[] = { &md_scan_mode.attr, &md_mismatches.attr, + &md_sync_min.attr, + &md_sync_max.attr, + &md_sync_speed.attr, + &md_sync_completed.attr, NULL, }; static struct attribute_group md_redundancy_group = { @@ -1937,14 +2424,16 @@ static void md_safemode_timeout(unsigned long data) md_wakeup_thread(mddev->thread); } +static int start_dirty_degraded; static int do_md_run(mddev_t * mddev) { - int pnum, err; + int err; int chunk_size; struct list_head *tmp; mdk_rdev_t *rdev; struct gendisk *disk; + struct mdk_personality *pers; char b[BDEVNAME_SIZE]; if (list_empty(&mddev->disks)) @@ -1961,20 +2450,8 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); chunk_size = mddev->chunk_size; - pnum = level_to_pers(mddev->level); - if ((pnum != MULTIPATH) && (pnum != RAID1)) { - if (!chunk_size) { - /* - * 'default chunksize' in the old md code used to - * be PAGE_SIZE, baaad. - * we abort here to be on the safe side. We don't - * want to continue the bad practice. - */ - printk(KERN_ERR - "no chunksize specified, see 'man raidtab'\n"); - return -EINVAL; - } + if (chunk_size) { if (chunk_size > MAX_CHUNK_SIZE) { printk(KERN_ERR "too big chunk_size: %d > %d\n", chunk_size, MAX_CHUNK_SIZE); @@ -2010,10 +2487,10 @@ static int do_md_run(mddev_t * mddev) } #ifdef CONFIG_KMOD - if (!pers[pnum]) - { - request_module("md-personality-%d", pnum); - } + if (mddev->level != LEVEL_NONE) + request_module("md-level-%d", mddev->level); + else if (mddev->clevel[0]) + request_module("md-%s", mddev->clevel); #endif /* @@ -2035,30 +2512,39 @@ static int do_md_run(mddev_t * mddev) return -ENOMEM; spin_lock(&pers_lock); - if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) { + pers = find_pers(mddev->level, mddev->clevel); + if (!pers || !try_module_get(pers->owner)) { spin_unlock(&pers_lock); - printk(KERN_WARNING "md: personality %d is not loaded!\n", - pnum); + if (mddev->level != LEVEL_NONE) + printk(KERN_WARNING "md: personality for level %d is not loaded!\n", + mddev->level); + else + printk(KERN_WARNING "md: personality for level %s is not loaded!\n", + mddev->clevel); return -EINVAL; } - - mddev->pers = pers[pnum]; + mddev->pers = pers; spin_unlock(&pers_lock); + mddev->level = pers->level; + strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; + mddev->ok_start_degraded = start_dirty_degraded; if (start_readonly) mddev->ro = 2; /* read-only, but switch on first write */ - /* before we start the array running, initialise the bitmap */ - err = bitmap_create(mddev); - if (err) - printk(KERN_ERR "%s: failed to create bitmap (%d)\n", - mdname(mddev), err); - else - err = mddev->pers->run(mddev); + err = mddev->pers->run(mddev); + if (!err && mddev->pers->sync_request) { + err = bitmap_create(mddev); + if (err) { + printk(KERN_ERR "%s: failed to create bitmap (%d)\n", + mdname(mddev), err); + mddev->pers->stop(mddev); + } + } if (err) { printk(KERN_ERR "md: pers->run() failed ...\n"); module_put(mddev->pers->owner); @@ -2104,6 +2590,7 @@ static int do_md_run(mddev_t * mddev) mddev->queue->make_request_fn = mddev->pers->make_request; mddev->changed = 1; + md_new_event(mddev); return 0; } @@ -2231,6 +2718,7 @@ static int do_md_stop(mddev_t * mddev, int ro) printk(KERN_INFO "md: %s switched to read-only mode.\n", mdname(mddev)); err = 0; + md_new_event(mddev); out: return err; } @@ -2668,12 +3156,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (info->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); - err = bind_rdev_to_array(rdev, mddev); - if (err) { - export_rdev(rdev); - return err; - } - if (!mddev->persistent) { printk(KERN_INFO "md: nonpersistent superblock ...\n"); rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; @@ -2681,8 +3163,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) rdev->sb_offset = calc_dev_sboffset(rdev->bdev); rdev->size = calc_dev_size(rdev, mddev->chunk_size); - if (!mddev->size || (mddev->size > rdev->size)) - mddev->size = rdev->size; + err = bind_rdev_to_array(rdev, mddev); + if (err) { + export_rdev(rdev); + return err; + } } return 0; @@ -2705,6 +3190,7 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev) kick_rdev_from_array(rdev); md_update_sb(mddev); + md_new_event(mddev); return 0; busy: @@ -2753,15 +3239,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) size = calc_dev_size(rdev, mddev->chunk_size); rdev->size = size; - if (size < mddev->size) { - printk(KERN_WARNING - "%s: disk size %llu blocks < array size %llu\n", - mdname(mddev), (unsigned long long)size, - (unsigned long long)mddev->size); - err = -ENOSPC; - goto abort_export; - } - if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING "md: can not hot-add faulty %s disk to %s!\n", @@ -2771,7 +3248,9 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) } clear_bit(In_sync, &rdev->flags); rdev->desc_nr = -1; - bind_rdev_to_array(rdev, mddev); + err = bind_rdev_to_array(rdev, mddev); + if (err) + goto abort_export; /* * The rest should better be atomic, we can have disk failures @@ -2795,7 +3274,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - + md_new_event(mddev); return 0; abort_unbind_export: @@ -2942,6 +3421,81 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) return 0; } +static int update_size(mddev_t *mddev, unsigned long size) +{ + mdk_rdev_t * rdev; + int rv; + struct list_head *tmp; + + if (mddev->pers->resize == NULL) + return -EINVAL; + /* The "size" is the amount of each device that is used. + * This can only make sense for arrays with redundancy. + * linear and raid0 always use whatever space is available + * We can only consider changing the size if no resync + * or reconstruction is happening, and if the new size + * is acceptable. It must fit before the sb_offset or, + * if that is <data_offset, it must fit before the + * size of each device. + * If size is zero, we find the largest size that fits. + */ + if (mddev->sync_thread) + return -EBUSY; + ITERATE_RDEV(mddev,rdev,tmp) { + sector_t avail; + int fit = (size == 0); + if (rdev->sb_offset > rdev->data_offset) + avail = (rdev->sb_offset*2) - rdev->data_offset; + else + avail = get_capacity(rdev->bdev->bd_disk) + - rdev->data_offset; + if (fit && (size == 0 || size > avail/2)) + size = avail/2; + if (avail < ((sector_t)size << 1)) + return -ENOSPC; + } + rv = mddev->pers->resize(mddev, (sector_t)size *2); + if (!rv) { + struct block_device *bdev; + + bdev = bdget_disk(mddev->gendisk, 0); + if (bdev) { + down(&bdev->bd_inode->i_sem); + i_size_write(bdev->bd_inode, mddev->array_size << 10); + up(&bdev->bd_inode->i_sem); + bdput(bdev); + } + } + return rv; +} + +static int update_raid_disks(mddev_t *mddev, int raid_disks) +{ + int rv; + /* change the number of raid disks */ + if (mddev->pers->reshape == NULL) + return -EINVAL; + if (raid_disks <= 0 || + raid_disks >= mddev->max_disks) + return -EINVAL; + if (mddev->sync_thread) + return -EBUSY; + rv = mddev->pers->reshape(mddev, raid_disks); + if (!rv) { + struct block_device *bdev; + + bdev = bdget_disk(mddev->gendisk, 0); + if (bdev) { + down(&bdev->bd_inode->i_sem); + i_size_write(bdev->bd_inode, mddev->array_size << 10); + up(&bdev->bd_inode->i_sem); + bdput(bdev); + } + } + return rv; +} + + /* * update_array_info is used to change the configuration of an * on-line array. @@ -2990,71 +3544,12 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) else return mddev->pers->reconfig(mddev, info->layout, -1); } - if (mddev->size != info->size) { - mdk_rdev_t * rdev; - struct list_head *tmp; - if (mddev->pers->resize == NULL) - return -EINVAL; - /* The "size" is the amount of each device that is used. - * This can only make sense for arrays with redundancy. - * linear and raid0 always use whatever space is available - * We can only consider changing the size if no resync - * or reconstruction is happening, and if the new size - * is acceptable. It must fit before the sb_offset or, - * if that is <data_offset, it must fit before the - * size of each device. - * If size is zero, we find the largest size that fits. - */ - if (mddev->sync_thread) - return -EBUSY; - ITERATE_RDEV(mddev,rdev,tmp) { - sector_t avail; - int fit = (info->size == 0); - if (rdev->sb_offset > rdev->data_offset) - avail = (rdev->sb_offset*2) - rdev->data_offset; - else - avail = get_capacity(rdev->bdev->bd_disk) - - rdev->data_offset; - if (fit && (info->size == 0 || info->size > avail/2)) - info->size = avail/2; - if (avail < ((sector_t)info->size << 1)) - return -ENOSPC; - } - rv = mddev->pers->resize(mddev, (sector_t)info->size *2); - if (!rv) { - struct block_device *bdev; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - down(&bdev->bd_inode->i_sem); - i_size_write(bdev->bd_inode, mddev->array_size << 10); - up(&bdev->bd_inode->i_sem); - bdput(bdev); - } - } - } - if (mddev->raid_disks != info->raid_disks) { - /* change the number of raid disks */ - if (mddev->pers->reshape == NULL) - return -EINVAL; - if (info->raid_disks <= 0 || - info->raid_disks >= mddev->max_disks) - return -EINVAL; - if (mddev->sync_thread) - return -EBUSY; - rv = mddev->pers->reshape(mddev, info->raid_disks); - if (!rv) { - struct block_device *bdev; - - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - down(&bdev->bd_inode->i_sem); - i_size_write(bdev->bd_inode, mddev->array_size << 10); - up(&bdev->bd_inode->i_sem); - bdput(bdev); - } - } - } + if (mddev->size != info->size) + rv = update_size(mddev, info->size); + + if (mddev->raid_disks != info->raid_disks) + rv = update_raid_disks(mddev, info->raid_disks); + if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { if (mddev->pers->quiesce == NULL) return -EINVAL; @@ -3476,11 +3971,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, { mdk_thread_t *thread; - thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); + thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL); if (!thread) return NULL; - memset(thread, 0, sizeof(mdk_thread_t)); init_waitqueue_head(&thread->wqueue); thread->run = run; @@ -3524,6 +4018,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); + md_new_event(mddev); } /* seq_file implementation /proc/mdstat */ @@ -3664,24 +4159,29 @@ static void md_seq_stop(struct seq_file *seq, void *v) mddev_put(mddev); } +struct mdstat_info { + int event; +}; + static int md_seq_show(struct seq_file *seq, void *v) { mddev_t *mddev = v; sector_t size; struct list_head *tmp2; mdk_rdev_t *rdev; - int i; + struct mdstat_info *mi = seq->private; struct bitmap *bitmap; if (v == (void*)1) { + struct mdk_personality *pers; seq_printf(seq, "Personalities : "); spin_lock(&pers_lock); - for (i = 0; i < MAX_PERSONALITY; i++) - if (pers[i]) - seq_printf(seq, "[%s] ", pers[i]->name); + list_for_each_entry(pers, &pers_list, list) + seq_printf(seq, "[%s] ", pers->name); spin_unlock(&pers_lock); seq_printf(seq, "\n"); + mi->event = atomic_read(&md_event_count); return 0; } if (v == (void*)2) { @@ -3790,47 +4290,68 @@ static struct seq_operations md_seq_ops = { static int md_seq_open(struct inode *inode, struct file *file) { int error; + struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (mi == NULL) + return -ENOMEM; error = seq_open(file, &md_seq_ops); + if (error) + kfree(mi); + else { + struct seq_file *p = file->private_data; + p->private = mi; + mi->event = atomic_read(&md_event_count); + } return error; } +static int md_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct mdstat_info *mi = m->private; + m->private = NULL; + kfree(mi); + return seq_release(inode, file); +} + +static unsigned int mdstat_poll(struct file *filp, poll_table *wait) +{ + struct seq_file *m = filp->private_data; + struct mdstat_info *mi = m->private; + int mask; + + poll_wait(filp, &md_event_waiters, wait); + + /* always allow read */ + mask = POLLIN | POLLRDNORM; + + if (mi->event != atomic_read(&md_event_count)) + mask |= POLLERR | POLLPRI; + return mask; +} + static struct file_operations md_seq_fops = { .open = md_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = md_seq_release, + .poll = mdstat_poll, }; -int register_md_personality(int pnum, mdk_personality_t *p) +int register_md_personality(struct mdk_personality *p) { - if (pnum >= MAX_PERSONALITY) { - printk(KERN_ERR - "md: tried to install personality %s as nr %d, but max is %lu\n", - p->name, pnum, MAX_PERSONALITY-1); - return -EINVAL; - } - spin_lock(&pers_lock); - if (pers[pnum]) { - spin_unlock(&pers_lock); - return -EBUSY; - } - - pers[pnum] = p; - printk(KERN_INFO "md: %s personality registered as nr %d\n", p->name, pnum); + list_add_tail(&p->list, &pers_list); + printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level); spin_unlock(&pers_lock); return 0; } -int unregister_md_personality(int pnum) +int unregister_md_personality(struct mdk_personality *p) { - if (pnum >= MAX_PERSONALITY) - return -EINVAL; - - printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name); + printk(KERN_INFO "md: %s personality unregistered\n", p->name); spin_lock(&pers_lock); - pers[pnum] = NULL; + list_del_init(&p->list); spin_unlock(&pers_lock); return 0; } @@ -4012,10 +4533,10 @@ static void md_do_sync(mddev_t *mddev) printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" - " %d KB/sec/disc.\n", sysctl_speed_limit_min); + " %d KB/sec/disc.\n", speed_min(mddev)); printk(KERN_INFO "md: using maximum available idle IO bandwidth " "(but not more than %d KB/sec) for reconstruction.\n", - sysctl_speed_limit_max); + speed_max(mddev)); is_mddev_idle(mddev); /* this also initializes IO event counters */ /* we don't use the checkpoint if there's a bitmap */ @@ -4056,7 +4577,7 @@ static void md_do_sync(mddev_t *mddev) skipped = 0; sectors = mddev->pers->sync_request(mddev, j, &skipped, - currspeed < sysctl_speed_limit_min); + currspeed < speed_min(mddev)); if (sectors == 0) { set_bit(MD_RECOVERY_ERR, &mddev->recovery); goto out; @@ -4069,7 +4590,11 @@ static void md_do_sync(mddev_t *mddev) j += sectors; if (j>1) mddev->curr_resync = j; - + if (last_check == 0) + /* this is the earliers that rebuilt will be + * visible in /proc/mdstat + */ + md_new_event(mddev); if (last_check + window > io_sectors || j == max_sectors) continue; @@ -4117,8 +4642,8 @@ static void md_do_sync(mddev_t *mddev) currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 /((jiffies-mddev->resync_mark)/HZ +1) +1; - if (currspeed > sysctl_speed_limit_min) { - if ((currspeed > sysctl_speed_limit_max) || + if (currspeed > speed_min(mddev)) { + if ((currspeed > speed_max(mddev)) || !is_mddev_idle(mddev)) { msleep(500); goto repeat; @@ -4255,6 +4780,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery = 0; /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_new_event(mddev); goto unlock; } /* Clear some bits that don't mean anything, but @@ -4292,6 +4818,7 @@ void md_check_recovery(mddev_t *mddev) sprintf(nm, "rd%d", rdev->raid_disk); sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); spares++; + md_new_event(mddev); } else break; } @@ -4324,9 +4851,9 @@ void md_check_recovery(mddev_t *mddev) mdname(mddev)); /* leave the spares where they are, it shouldn't hurt */ mddev->recovery = 0; - } else { + } else md_wakeup_thread(mddev->sync_thread); - } + md_new_event(mddev); } unlock: mddev_unlock(mddev); @@ -4503,12 +5030,14 @@ static int set_ro(const char *val, struct kernel_param *kp) int num = simple_strtoul(val, &e, 10); if (*val && (*e == '\0' || *e == '\n')) { start_readonly = num; - return 0;; + return 0; } return -EINVAL; } module_param_call(start_ro, set_ro, get_ro, NULL, 0600); +module_param(start_dirty_degraded, int, 0644); + EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(unregister_md_personality); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 145cdc5ad00..e6aa309a66d 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -35,15 +35,10 @@ #define NR_RESERVED_BUFS 32 -static mdk_personality_t multipath_personality; - - static void *mp_pool_alloc(gfp_t gfp_flags, void *data) { struct multipath_bh *mpb; - mpb = kmalloc(sizeof(*mpb), gfp_flags); - if (mpb) - memset(mpb, 0, sizeof(*mpb)); + mpb = kzalloc(sizeof(*mpb), gfp_flags); return mpb; } @@ -444,7 +439,7 @@ static int multipath_run (mddev_t *mddev) * should be freed in multipath_stop()] */ - conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL); + conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(KERN_ERR @@ -452,9 +447,8 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out; } - memset(conf, 0, sizeof(*conf)); - conf->multipaths = kmalloc(sizeof(struct multipath_info)*mddev->raid_disks, + conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->multipaths) { printk(KERN_ERR @@ -462,7 +456,6 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out_free_conf; } - memset(conf->multipaths, 0, sizeof(struct multipath_info)*mddev->raid_disks); conf->working_disks = 0; ITERATE_RDEV(mddev,rdev,tmp) { @@ -557,9 +550,10 @@ static int multipath_stop (mddev_t *mddev) return 0; } -static mdk_personality_t multipath_personality= +static struct mdk_personality multipath_personality = { .name = "multipath", + .level = LEVEL_MULTIPATH, .owner = THIS_MODULE, .make_request = multipath_make_request, .run = multipath_run, @@ -572,15 +566,17 @@ static mdk_personality_t multipath_personality= static int __init multipath_init (void) { - return register_md_personality (MULTIPATH, &multipath_personality); + return register_md_personality (&multipath_personality); } static void __exit multipath_exit (void) { - unregister_md_personality (MULTIPATH); + unregister_md_personality (&multipath_personality); } module_init(multipath_init); module_exit(multipath_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ +MODULE_ALIAS("md-multipath"); +MODULE_ALIAS("md-level--4"); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index fece3277c2a..abbca150202 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -113,21 +113,16 @@ static int create_strip_zones (mddev_t *mddev) } printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); - conf->strip_zone = kmalloc(sizeof(struct strip_zone)* + conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) return 1; - conf->devlist = kmalloc(sizeof(mdk_rdev_t*)* + conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) return 1; - memset(conf->strip_zone, 0,sizeof(struct strip_zone)* - conf->nr_strip_zones); - memset(conf->devlist, 0, - sizeof(mdk_rdev_t*) * conf->nr_strip_zones * mddev->raid_disks); - /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all */ @@ -280,7 +275,11 @@ static int raid0_run (mddev_t *mddev) mdk_rdev_t *rdev; struct list_head *tmp; - printk("%s: setting max_sectors to %d, segment boundary to %d\n", + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + return -EINVAL; + } + printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", mdname(mddev), mddev->chunk_size >> 9, (mddev->chunk_size>>1)-1); @@ -361,7 +360,7 @@ static int raid0_run (mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE; + int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } @@ -512,9 +511,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) return; } -static mdk_personality_t raid0_personality= +static struct mdk_personality raid0_personality= { .name = "raid0", + .level = 0, .owner = THIS_MODULE, .make_request = raid0_make_request, .run = raid0_run, @@ -524,15 +524,17 @@ static mdk_personality_t raid0_personality= static int __init raid0_init (void) { - return register_md_personality (RAID0, &raid0_personality); + return register_md_personality (&raid0_personality); } static void raid0_exit (void) { - unregister_md_personality (RAID0); + unregister_md_personality (&raid0_personality); } module_init(raid0_init); module_exit(raid0_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-2"); /* RAID0 */ +MODULE_ALIAS("md-raid0"); +MODULE_ALIAS("md-level-0"); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 229d7b20429..a06ff91f27e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -47,10 +47,11 @@ */ #define NR_RAID1_BIOS 256 -static mdk_personality_t raid1_personality; static void unplug_slaves(mddev_t *mddev); +static void allow_barrier(conf_t *conf); +static void lower_barrier(conf_t *conf); static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) { @@ -59,10 +60,8 @@ static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) int size = offsetof(r1bio_t, bios[pi->raid_disks]); /* allocate a r1bio with room for raid_disks entries in the bios array */ - r1_bio = kmalloc(size, gfp_flags); - if (r1_bio) - memset(r1_bio, 0, size); - else + r1_bio = kzalloc(size, gfp_flags); + if (!r1_bio) unplug_slaves(pi->mddev); return r1_bio; @@ -104,15 +103,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) } /* * Allocate RESYNC_PAGES data pages and attach them to - * the first bio; + * the first bio. + * If this is a user-requested check/repair, allocate + * RESYNC_PAGES for each bio. */ - bio = r1_bio->bios[0]; - for (i = 0; i < RESYNC_PAGES; i++) { - page = alloc_page(gfp_flags); - if (unlikely(!page)) - goto out_free_pages; - - bio->bi_io_vec[i].bv_page = page; + if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) + j = pi->raid_disks; + else + j = 1; + while(j--) { + bio = r1_bio->bios[j]; + for (i = 0; i < RESYNC_PAGES; i++) { + page = alloc_page(gfp_flags); + if (unlikely(!page)) + goto out_free_pages; + + bio->bi_io_vec[i].bv_page = page; + } + } + /* If not user-requests, copy the page pointers to all bios */ + if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { + for (i=0; i<RESYNC_PAGES ; i++) + for (j=1; j<pi->raid_disks; j++) + r1_bio->bios[j]->bi_io_vec[i].bv_page = + r1_bio->bios[0]->bi_io_vec[i].bv_page; } r1_bio->master_bio = NULL; @@ -120,8 +134,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; out_free_pages: - for ( ; i > 0 ; i--) - __free_page(bio->bi_io_vec[i-1].bv_page); + for (i=0; i < RESYNC_PAGES ; i++) + for (j=0 ; j < pi->raid_disks; j++) + safe_put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); + j = -1; out_free_bio: while ( ++j < pi->raid_disks ) bio_put(r1_bio->bios[j]); @@ -132,14 +148,16 @@ out_free_bio: static void r1buf_pool_free(void *__r1_bio, void *data) { struct pool_info *pi = data; - int i; + int i,j; r1bio_t *r1bio = __r1_bio; - struct bio *bio = r1bio->bios[0]; - for (i = 0; i < RESYNC_PAGES; i++) { - __free_page(bio->bi_io_vec[i].bv_page); - bio->bi_io_vec[i].bv_page = NULL; - } + for (i = 0; i < RESYNC_PAGES; i++) + for (j = pi->raid_disks; j-- ;) { + if (j == 0 || + r1bio->bios[j]->bi_io_vec[i].bv_page != + r1bio->bios[0]->bi_io_vec[i].bv_page) + safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); + } for (i=0 ; i < pi->raid_disks; i++) bio_put(r1bio->bios[i]); @@ -152,7 +170,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) for (i = 0; i < conf->raid_disks; i++) { struct bio **bio = r1_bio->bios + i; - if (*bio) + if (*bio && *bio != IO_BLOCKED) bio_put(*bio); *bio = NULL; } @@ -160,20 +178,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) static inline void free_r1bio(r1bio_t *r1_bio) { - unsigned long flags; - conf_t *conf = mddev_to_conf(r1_bio->mddev); /* * Wake up any possible resync thread that waits for the device * to go idle. */ - spin_lock_irqsave(&conf->resync_lock, flags); - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + allow_barrier(conf); put_all_bios(conf, r1_bio); mempool_free(r1_bio, conf->r1bio_pool); @@ -182,22 +193,17 @@ static inline void free_r1bio(r1bio_t *r1_bio) static inline void put_buf(r1bio_t *r1_bio) { conf_t *conf = mddev_to_conf(r1_bio->mddev); - unsigned long flags; + int i; - mempool_free(r1_bio, conf->r1buf_pool); + for (i=0; i<conf->raid_disks; i++) { + struct bio *bio = r1_bio->bios[i]; + if (bio->bi_end_io) + rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); + } - spin_lock_irqsave(&conf->resync_lock, flags); - if (!conf->barrier) - BUG(); - --conf->barrier; - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); + mempool_free(r1_bio, conf->r1buf_pool); - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + lower_barrier(conf); } static void reschedule_retry(r1bio_t *r1_bio) @@ -208,8 +214,10 @@ static void reschedule_retry(r1bio_t *r1_bio) spin_lock_irqsave(&conf->device_lock, flags); list_add(&r1_bio->retry_list, &conf->retry_list); + conf->nr_queued ++; spin_unlock_irqrestore(&conf->device_lock, flags); + wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); } @@ -261,9 +269,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - else + update_head_pos(mirror, r1_bio); + + if (uptodate || conf->working_disks <= 1) { /* * Set R1BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -273,16 +281,11 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int * user-side. So if something waits for IO, then it will * wait for the 'master' bio. */ - set_bit(R1BIO_Uptodate, &r1_bio->state); - - update_head_pos(mirror, r1_bio); + if (uptodate) + set_bit(R1BIO_Uptodate, &r1_bio->state); - /* - * we have only one bio on the read side - */ - if (uptodate) raid_end_bio_io(r1_bio); - else { + } else { /* * oops, read error: */ @@ -378,7 +381,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int /* free extra copy of the data pages */ int i = bio->bi_vcnt; while (i--) - __free_page(bio->bi_io_vec[i].bv_page); + safe_put_page(bio->bi_io_vec[i].bv_page); } /* clear the bitmap if all writes complete successfully */ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, @@ -433,11 +436,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) new_disk = 0; for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); + r1_bio->bios[new_disk] == IO_BLOCKED || !rdev || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags); rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) { - if (rdev && test_bit(In_sync, &rdev->flags)) + if (rdev && test_bit(In_sync, &rdev->flags) && + r1_bio->bios[new_disk] != IO_BLOCKED) wonly_disk = new_disk; if (new_disk == conf->raid_disks - 1) { @@ -451,11 +456,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) /* make sure the disk is operational */ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); + r1_bio->bios[new_disk] == IO_BLOCKED || !rdev || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags); rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { - if (rdev && test_bit(In_sync, &rdev->flags)) + if (rdev && test_bit(In_sync, &rdev->flags) && + r1_bio->bios[new_disk] != IO_BLOCKED) wonly_disk = new_disk; if (new_disk <= 0) @@ -492,7 +499,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) rdev = rcu_dereference(conf->mirrors[disk].rdev); - if (!rdev || + if (!rdev || r1_bio->bios[disk] == IO_BLOCKED || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -520,7 +527,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) /* cannot risk returning a device that failed * before we inc'ed nr_pending */ - atomic_dec(&rdev->nr_pending); + rdev_dec_pending(rdev, conf->mddev); goto retry; } conf->next_seq_sect = this_sector + sectors; @@ -593,42 +600,119 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } -/* - * Throttle resync depth, so that we can both get proper overlapping of - * requests, but are still able to handle normal requests quickly. +/* Barriers.... + * Sometimes we need to suspend IO while we do something else, + * either some resync/recovery, or reconfigure the array. + * To do this we raise a 'barrier'. + * The 'barrier' is a counter that can be raised multiple times + * to count how many activities are happening which preclude + * normal IO. + * We can only raise the barrier if there is no pending IO. + * i.e. if nr_pending == 0. + * We choose only to raise the barrier if no-one is waiting for the + * barrier to go down. This means that as soon as an IO request + * is ready, no other operations which require a barrier will start + * until the IO request has had a chance. + * + * So: regular IO calls 'wait_barrier'. When that returns there + * is no backgroup IO happening, It must arrange to call + * allow_barrier when it has finished its IO. + * backgroup IO calls must call raise_barrier. Once that returns + * there is no normal IO happeing. It must arrange to call + * lower_barrier when the particular background IO completes. */ #define RESYNC_DEPTH 32 -static void device_barrier(conf_t *conf, sector_t sect) +static void raise_barrier(conf_t *conf) { spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - - if (!conf->barrier++) { - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - if (conf->nr_pending) - BUG(); + + /* Wait until no block IO is waiting */ + wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + + /* block any new IO from starting */ + conf->barrier++; + + /* No wait for all pending IO to complete */ + wait_event_lock_irq(conf->wait_barrier, + !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + + spin_unlock_irq(&conf->resync_lock); +} + +static void lower_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->barrier--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void wait_barrier(conf_t *conf) +{ + spin_lock_irq(&conf->resync_lock); + if (conf->barrier) { + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + conf->nr_waiting--; } - wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - conf->next_resync = sect; + conf->nr_pending++; + spin_unlock_irq(&conf->resync_lock); +} + +static void allow_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->nr_pending--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void freeze_array(conf_t *conf) +{ + /* stop syncio and normal IO and wait for everything to + * go quite. + * We increment barrier and nr_waiting, and then + * wait until barrier+nr_pending match nr_queued+2 + */ + spin_lock_irq(&conf->resync_lock); + conf->barrier++; + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, + conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + spin_unlock_irq(&conf->resync_lock); +} +static void unfreeze_array(conf_t *conf) +{ + /* reverse the effect of the freeze */ + spin_lock_irq(&conf->resync_lock); + conf->barrier--; + conf->nr_waiting--; + wake_up(&conf->wait_barrier); spin_unlock_irq(&conf->resync_lock); } + /* duplicate the data pages for behind I/O */ static struct page **alloc_behind_pages(struct bio *bio) { int i; struct bio_vec *bvec; - struct page **pages = kmalloc(bio->bi_vcnt * sizeof(struct page *), + struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page *), GFP_NOIO); if (unlikely(!pages)) goto do_sync_io; - memset(pages, 0, bio->bi_vcnt * sizeof(struct page *)); - bio_for_each_segment(bvec, bio, i) { pages[i] = alloc_page(GFP_NOIO); if (unlikely(!pages[i])) @@ -644,7 +728,7 @@ static struct page **alloc_behind_pages(struct bio *bio) do_sync_io: if (pages) for (i = 0; i < bio->bi_vcnt && pages[i]; i++) - __free_page(pages[i]); + put_page(pages[i]); kfree(pages); PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); return NULL; @@ -678,10 +762,7 @@ static int make_request(request_queue_t *q, struct bio * bio) */ md_write_start(mddev, bio); /* wait on superblock update early */ - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); @@ -749,7 +830,7 @@ static int make_request(request_queue_t *q, struct bio * bio) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { - atomic_dec(&rdev->nr_pending); + rdev_dec_pending(rdev, mddev); r1_bio->bios[i] = NULL; } else r1_bio->bios[i] = bio; @@ -909,13 +990,8 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - spin_unlock_irq(&conf->resync_lock); - - if (conf->barrier) BUG(); - if (waitqueue_active(&conf->wait_idle)) BUG(); + wait_barrier(conf); + allow_barrier(conf); mempool_destroy(conf->r1buf_pool); conf->r1buf_pool = NULL; @@ -1015,28 +1091,27 @@ abort: static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); - conf_t *conf = mddev_to_conf(r1_bio->mddev); + int i; if (bio->bi_size) return 1; - if (r1_bio->bios[r1_bio->read_disk] != bio) - BUG(); - update_head_pos(r1_bio->read_disk, r1_bio); + for (i=r1_bio->mddev->raid_disks; i--; ) + if (r1_bio->bios[i] == bio) + break; + BUG_ON(i < 0); + update_head_pos(i, r1_bio); /* * we have read a block, now it needs to be re-written, * or re-read if the read failed. * We don't do much here, just schedule handling by raid1d */ - if (!uptodate) { - md_error(r1_bio->mddev, - conf->mirrors[r1_bio->read_disk].rdev); - } else + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) set_bit(R1BIO_Uptodate, &r1_bio->state); - rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); - reschedule_retry(r1_bio); + + if (atomic_dec_and_test(&r1_bio->remaining)) + reschedule_retry(r1_bio); return 0; } @@ -1066,7 +1141,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) md_done_sync(mddev, r1_bio->sectors, uptodate); put_buf(r1_bio); } - rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); return 0; } @@ -1079,34 +1153,173 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) bio = r1_bio->bios[r1_bio->read_disk]; -/* - if (r1_bio->sector == 0) printk("First sync write startss\n"); -*/ - /* - * schedule writes - */ + + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + /* We have read all readable devices. If we haven't + * got the block, then there is no hope left. + * If we have, then we want to do a comparison + * and skip the write if everything is the same. + * If any blocks failed to read, then we need to + * attempt an over-write + */ + int primary; + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { + for (i=0; i<mddev->raid_disks; i++) + if (r1_bio->bios[i]->bi_end_io == end_sync_read) + md_error(mddev, conf->mirrors[i].rdev); + + md_done_sync(mddev, r1_bio->sectors, 1); + put_buf(r1_bio); + return; + } + for (primary=0; primary<mddev->raid_disks; primary++) + if (r1_bio->bios[primary]->bi_end_io == end_sync_read && + test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { + r1_bio->bios[primary]->bi_end_io = NULL; + rdev_dec_pending(conf->mirrors[primary].rdev, mddev); + break; + } + r1_bio->read_disk = primary; + for (i=0; i<mddev->raid_disks; i++) + if (r1_bio->bios[i]->bi_end_io == end_sync_read && + test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) { + int j; + int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); + struct bio *pbio = r1_bio->bios[primary]; + struct bio *sbio = r1_bio->bios[i]; + for (j = vcnt; j-- ; ) + if (memcmp(page_address(pbio->bi_io_vec[j].bv_page), + page_address(sbio->bi_io_vec[j].bv_page), + PAGE_SIZE)) + break; + if (j >= 0) + mddev->resync_mismatches += r1_bio->sectors; + if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { + sbio->bi_end_io = NULL; + rdev_dec_pending(conf->mirrors[i].rdev, mddev); + } else { + /* fixup the bio for reuse */ + sbio->bi_vcnt = vcnt; + sbio->bi_size = r1_bio->sectors << 9; + sbio->bi_idx = 0; + sbio->bi_phys_segments = 0; + sbio->bi_hw_segments = 0; + sbio->bi_hw_front_size = 0; + sbio->bi_hw_back_size = 0; + sbio->bi_flags &= ~(BIO_POOL_MASK - 1); + sbio->bi_flags |= 1 << BIO_UPTODATE; + sbio->bi_next = NULL; + sbio->bi_sector = r1_bio->sector + + conf->mirrors[i].rdev->data_offset; + sbio->bi_bdev = conf->mirrors[i].rdev->bdev; + } + } + } if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { - /* - * There is no point trying a read-for-reconstruct as - * reconstruct is about to be aborted + /* ouch - failed to read all of that. + * Try some synchronous reads of other devices to get + * good data, much like with normal read errors. Only + * read into the pages we already have so they we don't + * need to re-issue the read request. + * We don't need to freeze the array, because being in an + * active sync request, there is no normal IO, and + * no overlapping syncs. */ - char b[BDEVNAME_SIZE]; - printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error" - " for block %llu\n", - bdevname(bio->bi_bdev,b), - (unsigned long long)r1_bio->sector); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); - return; + sector_t sect = r1_bio->sector; + int sectors = r1_bio->sectors; + int idx = 0; + + while(sectors) { + int s = sectors; + int d = r1_bio->read_disk; + int success = 0; + mdk_rdev_t *rdev; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + do { + if (r1_bio->bios[d]->bi_end_io == end_sync_read) { + rdev = conf->mirrors[d].rdev; + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, + bio->bi_io_vec[idx].bv_page, + READ)) { + success = 1; + break; + } + } + d++; + if (d == conf->raid_disks) + d = 0; + } while (!success && d != r1_bio->read_disk); + + if (success) { + int start = d; + /* write it back and re-read */ + set_bit(R1BIO_Uptodate, &r1_bio->state); + while (d != r1_bio->read_disk) { + if (d == 0) + d = conf->raid_disks; + d--; + if (r1_bio->bios[d]->bi_end_io != end_sync_read) + continue; + rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, + bio->bi_io_vec[idx].bv_page, + WRITE) == 0) + md_error(mddev, rdev); + } + d = start; + while (d != r1_bio->read_disk) { + if (d == 0) + d = conf->raid_disks; + d--; + if (r1_bio->bios[d]->bi_end_io != end_sync_read) + continue; + rdev = conf->mirrors[d].rdev; + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, + bio->bi_io_vec[idx].bv_page, + READ) == 0) + md_error(mddev, rdev); + } + } else { + char b[BDEVNAME_SIZE]; + /* Cannot read from anywhere, array is toast */ + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error" + " for block %llu\n", + bdevname(bio->bi_bdev,b), + (unsigned long long)r1_bio->sector); + md_done_sync(mddev, r1_bio->sectors, 0); + put_buf(r1_bio); + return; + } + sectors -= s; + sect += s; + idx ++; + } } + /* + * schedule writes + */ atomic_set(&r1_bio->remaining, 1); for (i = 0; i < disks ; i++) { wbio = r1_bio->bios[i]; - if (wbio->bi_end_io != end_sync_write) + if (wbio->bi_end_io == NULL || + (wbio->bi_end_io == end_sync_read && + (i == r1_bio->read_disk || + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; - atomic_inc(&conf->mirrors[i].rdev->nr_pending); + wbio->bi_rw = WRITE; + wbio->bi_end_io = end_sync_write; atomic_inc(&r1_bio->remaining); md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); @@ -1167,6 +1380,7 @@ static void raid1d(mddev_t *mddev) break; r1_bio = list_entry(head->prev, r1bio_t, retry_list); list_del(head->prev); + conf->nr_queued--; spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r1_bio->mddev; @@ -1206,6 +1420,86 @@ static void raid1d(mddev_t *mddev) } } else { int disk; + + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen + */ + sector_t sect = r1_bio->sector; + int sectors = r1_bio->sectors; + freeze_array(conf); + if (mddev->ro == 0) while(sectors) { + int s = sectors; + int d = r1_bio->read_disk; + int success = 0; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + do { + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags) && + sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ)) + success = 1; + else { + d++; + if (d == conf->raid_disks) + d = 0; + } + } while (!success && d != r1_bio->read_disk); + + if (success) { + /* write it back and re-read */ + int start = d; + while (d != r1_bio->read_disk) { + if (d==0) + d = conf->raid_disks; + d--; + rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, conf->tmppage, WRITE) == 0) + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + d = start; + while (d != r1_bio->read_disk) { + if (d==0) + d = conf->raid_disks; + d--; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, conf->tmppage, READ) == 0) + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + } else { + /* Cannot read from anywhere -- bye bye array */ + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + break; + } + sectors -= s; + sect += s; + } + + unfreeze_array(conf); + bio = r1_bio->bios[r1_bio->read_disk]; if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" @@ -1214,7 +1508,8 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - r1_bio->bios[r1_bio->read_disk] = NULL; + r1_bio->bios[r1_bio->read_disk] = + mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; bio_put(bio); bio = bio_clone(r1_bio->master_bio, GFP_NOIO); @@ -1269,14 +1564,13 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { conf_t *conf = mddev_to_conf(mddev); - mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *bio; sector_t max_sector, nr_sectors; - int disk; + int disk = -1; int i; - int wonly; - int write_targets = 0; + int wonly = -1; + int write_targets = 0, read_targets = 0; int sync_blocks; int still_degraded = 0; @@ -1317,55 +1611,35 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return sync_blocks; } /* - * If there is non-resync activity waiting for us then - * put in a delay to throttle resync. + * If there is non-resync activity waiting for a turn, + * and resync is going fast enough, + * then let it though before starting on this new sync request. */ - if (!go_faster && waitqueue_active(&conf->wait_resume)) + if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - device_barrier(conf, sector_nr + RESYNC_SECTORS); - - /* - * If reconstructing, and >1 working disc, - * could dedicate one to rebuild and others to - * service read requests .. - */ - disk = conf->last_used; - /* make sure disk is operational */ - wonly = disk; - while (conf->mirrors[disk].rdev == NULL || - !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) || - test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) - ) { - if (conf->mirrors[disk].rdev && - test_bit(In_sync, &conf->mirrors[disk].rdev->flags)) - wonly = disk; - if (disk <= 0) - disk = conf->raid_disks; - disk--; - if (disk == conf->last_used) { - disk = wonly; - break; - } - } - conf->last_used = disk; - atomic_inc(&conf->mirrors[disk].rdev->nr_pending); + raise_barrier(conf); - mirror = conf->mirrors + disk; + conf->next_resync = sector_nr; r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); - - spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + rcu_read_lock(); + /* + * If we get a correctably read error during resync or recovery, + * we might want to read from a different device. So we + * flag all drives that could conceivably be read from for READ, + * and any others (which will be non-In_sync devices) for WRITE. + * If a read fails, we try reading from something else for which READ + * is OK. + */ r1_bio->mddev = mddev; r1_bio->sector = sector_nr; r1_bio->state = 0; set_bit(R1BIO_IsSync, &r1_bio->state); - r1_bio->read_disk = disk; for (i=0; i < conf->raid_disks; i++) { + mdk_rdev_t *rdev; bio = r1_bio->bios[i]; /* take from bio_init */ @@ -1380,35 +1654,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_end_io = NULL; bio->bi_private = NULL; - if (i == disk) { - bio->bi_rw = READ; - bio->bi_end_io = end_sync_read; - } else if (conf->mirrors[i].rdev == NULL || - test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { + rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev == NULL || + test_bit(Faulty, &rdev->flags)) { still_degraded = 1; continue; - } else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || - sector_nr + RESYNC_SECTORS > mddev->recovery_cp || - test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; - } else - /* no need to read or write here */ - continue; - bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; - bio->bi_bdev = conf->mirrors[i].rdev->bdev; + } else { + /* may need to read from here */ + bio->bi_rw = READ; + bio->bi_end_io = end_sync_read; + if (test_bit(WriteMostly, &rdev->flags)) { + if (wonly < 0) + wonly = i; + } else { + if (disk < 0) + disk = i; + } + read_targets++; + } + atomic_inc(&rdev->nr_pending); + bio->bi_sector = sector_nr + rdev->data_offset; + bio->bi_bdev = rdev->bdev; bio->bi_private = r1_bio; } + rcu_read_unlock(); + if (disk < 0) + disk = wonly; + r1_bio->read_disk = disk; + + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) + /* extra read targets are also write targets */ + write_targets += read_targets-1; - if (write_targets == 0) { + if (write_targets == 0 || read_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished */ sector_t rv = max_sector - sector_nr; *skipped = 1; put_buf(r1_bio); - rdev_dec_pending(conf->mirrors[disk].rdev, mddev); return rv; } @@ -1436,10 +1724,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0 ; i < conf->raid_disks; i++) { bio = r1_bio->bios[i]; if (bio->bi_end_io) { - page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page; + page = bio->bi_io_vec[bio->bi_vcnt].bv_page; if (bio_add_page(bio, page, len, 0) == 0) { /* stop here */ - r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page; + bio->bi_io_vec[bio->bi_vcnt].bv_page = page; while (i > 0) { i--; bio = r1_bio->bios[i]; @@ -1459,12 +1747,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i sync_blocks -= (len>>9); } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); bio_full: - bio = r1_bio->bios[disk]; r1_bio->sectors = nr_sectors; - md_sync_acct(mirror->rdev->bdev, nr_sectors); + /* For a user-requested sync, we read all readable devices and do a + * compare + */ + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + atomic_set(&r1_bio->remaining, read_targets); + for (i=0; i<conf->raid_disks; i++) { + bio = r1_bio->bios[i]; + if (bio->bi_end_io == end_sync_read) { + md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors); + generic_make_request(bio); + } + } + } else { + atomic_set(&r1_bio->remaining, 1); + bio = r1_bio->bios[r1_bio->read_disk]; + md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, + nr_sectors); + generic_make_request(bio); - generic_make_request(bio); + } return nr_sectors; } @@ -1487,18 +1791,19 @@ static int run(mddev_t *mddev) * bookkeeping area. [whatever we allocate in run(), * should be freed in stop()] */ - conf = kmalloc(sizeof(conf_t), GFP_KERNEL); + conf = kzalloc(sizeof(conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) goto out_no_mem; - memset(conf, 0, sizeof(*conf)); - conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, + conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) goto out_no_mem; - memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); + conf->tmppage = alloc_page(GFP_KERNEL); + if (!conf->tmppage) + goto out_no_mem; conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); if (!conf->poolinfo) @@ -1542,8 +1847,7 @@ static int run(mddev_t *mddev) mddev->recovery_cp = MaxSector; spin_lock_init(&conf->resync_lock); - init_waitqueue_head(&conf->wait_idle); - init_waitqueue_head(&conf->wait_resume); + init_waitqueue_head(&conf->wait_barrier); bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->flushing_bio_list); @@ -1583,7 +1887,6 @@ static int run(mddev_t *mddev) mdname(mddev)); goto out_free_conf; } - if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; printk(KERN_INFO "raid1: raid set %s active with %d out of %d mirrors\n", @@ -1608,6 +1911,7 @@ out_free_conf: if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); + safe_put_page(conf->tmppage); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; @@ -1706,19 +2010,14 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) kfree(newpoolinfo); return -ENOMEM; } - newmirrors = kmalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL); + newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL); if (!newmirrors) { kfree(newpoolinfo); mempool_destroy(newpool); return -ENOMEM; } - memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks); - spin_lock_irq(&conf->resync_lock); - conf->barrier++; - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(mddev->queue)); - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf); /* ok, everything is stopped */ oldpool = conf->r1bio_pool; @@ -1738,12 +2037,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) conf->raid_disks = mddev->raid_disks = raid_disks; conf->last_used = 0; /* just make sure it is in-range */ - spin_lock_irq(&conf->resync_lock); - conf->barrier--; - spin_unlock_irq(&conf->resync_lock); - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); - + lower_barrier(conf); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); @@ -1758,33 +2052,19 @@ static void raid1_quiesce(mddev_t *mddev, int state) switch(state) { case 1: - spin_lock_irq(&conf->resync_lock); - conf->barrier++; - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(mddev->queue)); - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf); break; case 0: - spin_lock_irq(&conf->resync_lock); - conf->barrier--; - spin_unlock_irq(&conf->resync_lock); - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); + lower_barrier(conf); break; } - if (mddev->thread) { - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - else - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - md_wakeup_thread(mddev->thread); - } } -static mdk_personality_t raid1_personality = +static struct mdk_personality raid1_personality = { .name = "raid1", + .level = 1, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -1802,15 +2082,17 @@ static mdk_personality_t raid1_personality = static int __init raid_init(void) { - return register_md_personality(RAID1, &raid1_personality); + return register_md_personality(&raid1_personality); } static void raid_exit(void) { - unregister_md_personality(RAID1); + unregister_md_personality(&raid1_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ +MODULE_ALIAS("md-raid1"); +MODULE_ALIAS("md-level-1"); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 713dc9c2c73..9e658e519a2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -18,7 +18,9 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include "dm-bio-list.h" #include <linux/raid/raid10.h> +#include <linux/raid/bitmap.h> /* * RAID10 provides a combination of RAID0 and RAID1 functionality. @@ -47,6 +49,9 @@ static void unplug_slaves(mddev_t *mddev); +static void allow_barrier(conf_t *conf); +static void lower_barrier(conf_t *conf); + static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; @@ -54,10 +59,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) int size = offsetof(struct r10bio_s, devs[conf->copies]); /* allocate a r10bio with room for raid_disks entries in the bios array */ - r10_bio = kmalloc(size, gfp_flags); - if (r10_bio) - memset(r10_bio, 0, size); - else + r10_bio = kzalloc(size, gfp_flags); + if (!r10_bio) unplug_slaves(conf->mddev); return r10_bio; @@ -129,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) out_free_pages: for ( ; i > 0 ; i--) - __free_page(bio->bi_io_vec[i-1].bv_page); + safe_put_page(bio->bi_io_vec[i-1].bv_page); while (j--) for (i = 0; i < RESYNC_PAGES ; i++) - __free_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); + safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); j = -1; out_free_bio: while ( ++j < nalloc ) @@ -152,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data) struct bio *bio = r10bio->devs[j].bio; if (bio) { for (i = 0; i < RESYNC_PAGES; i++) { - __free_page(bio->bi_io_vec[i].bv_page); + safe_put_page(bio->bi_io_vec[i].bv_page); bio->bi_io_vec[i].bv_page = NULL; } bio_put(bio); @@ -167,7 +170,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) for (i = 0; i < conf->copies; i++) { struct bio **bio = & r10_bio->devs[i].bio; - if (*bio) + if (*bio && *bio != IO_BLOCKED) bio_put(*bio); *bio = NULL; } @@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static inline void free_r10bio(r10bio_t *r10_bio) { - unsigned long flags; - conf_t *conf = mddev_to_conf(r10_bio->mddev); /* * Wake up any possible resync thread that waits for the device * to go idle. */ - spin_lock_irqsave(&conf->resync_lock, flags); - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + allow_barrier(conf); put_all_bios(conf, r10_bio); mempool_free(r10_bio, conf->r10bio_pool); @@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio) static inline void put_buf(r10bio_t *r10_bio) { conf_t *conf = mddev_to_conf(r10_bio->mddev); - unsigned long flags; mempool_free(r10_bio, conf->r10buf_pool); - spin_lock_irqsave(&conf->resync_lock, flags); - if (!conf->barrier) - BUG(); - --conf->barrier; - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); - - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + lower_barrier(conf); } static void reschedule_retry(r10bio_t *r10_bio) @@ -223,6 +207,7 @@ static void reschedule_retry(r10bio_t *r10_bio) spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); + conf->nr_queued ++; spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); @@ -268,9 +253,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) - md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - else + update_head_pos(slot, r10_bio); + + if (uptodate) { /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code to the higher @@ -281,15 +266,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); - - update_head_pos(slot, r10_bio); - - /* - * we have only one bio on the read side - */ - if (uptodate) raid_end_bio_io(r10_bio); - else { + } else { /* * oops, read error: */ @@ -322,9 +300,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) + if (!uptodate) { md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - else + /* an I/O failed, we can't clear the bitmap */ + set_bit(R10BIO_Degraded, &r10_bio->state); + } else /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -344,6 +324,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in * already. */ if (atomic_dec_and_test(&r10_bio->remaining)) { + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); md_write_end(r10_bio->mddev); raid_end_bio_io(r10_bio); } @@ -502,8 +487,9 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) rcu_read_lock(); /* * Check if we can balance. We can balance on the whole - * device if no resync is going on, or below the resync window. - * We take the first readable disk when above the resync window. + * device if no resync is going on (recovery is ok), or below + * the resync window. We take the first readable disk when + * above the resync window. */ if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { @@ -512,6 +498,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) disk = r10_bio->devs[slot].devnum; while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + r10_bio->devs[slot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) { slot++; if (slot == conf->copies) { @@ -529,6 +516,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = 0; disk = r10_bio->devs[slot].devnum; while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + r10_bio->devs[slot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) { slot ++; if (slot == conf->copies) { @@ -549,6 +537,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || + r10_bio->devs[nslot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) continue; @@ -607,7 +596,10 @@ static void unplug_slaves(mddev_t *mddev) static void raid10_unplug(request_queue_t *q) { + mddev_t *mddev = q->queuedata; + unplug_slaves(q->queuedata); + md_wakeup_thread(mddev->thread); } static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, @@ -640,27 +632,107 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } -/* - * Throttle resync depth, so that we can both get proper overlapping of - * requests, but are still able to handle normal requests quickly. +/* Barriers.... + * Sometimes we need to suspend IO while we do something else, + * either some resync/recovery, or reconfigure the array. + * To do this we raise a 'barrier'. + * The 'barrier' is a counter that can be raised multiple times + * to count how many activities are happening which preclude + * normal IO. + * We can only raise the barrier if there is no pending IO. + * i.e. if nr_pending == 0. + * We choose only to raise the barrier if no-one is waiting for the + * barrier to go down. This means that as soon as an IO request + * is ready, no other operations which require a barrier will start + * until the IO request has had a chance. + * + * So: regular IO calls 'wait_barrier'. When that returns there + * is no backgroup IO happening, It must arrange to call + * allow_barrier when it has finished its IO. + * backgroup IO calls must call raise_barrier. Once that returns + * there is no normal IO happeing. It must arrange to call + * lower_barrier when the particular background IO completes. */ #define RESYNC_DEPTH 32 -static void device_barrier(conf_t *conf, sector_t sect) +static void raise_barrier(conf_t *conf, int force) +{ + BUG_ON(force && !conf->barrier); + spin_lock_irq(&conf->resync_lock); + + /* Wait until no block IO is waiting (unless 'force') */ + wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + + /* block any new IO from starting */ + conf->barrier++; + + /* No wait for all pending IO to complete */ + wait_event_lock_irq(conf->wait_barrier, + !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + + spin_unlock_irq(&conf->resync_lock); +} + +static void lower_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->barrier--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void wait_barrier(conf_t *conf) { spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), - conf->resync_lock, unplug_slaves(conf->mddev)); - - if (!conf->barrier++) { - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, unplug_slaves(conf->mddev)); - if (conf->nr_pending) - BUG(); + if (conf->barrier) { + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + conf->nr_waiting--; } - wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, - conf->resync_lock, unplug_slaves(conf->mddev)); - conf->next_resync = sect; + conf->nr_pending++; + spin_unlock_irq(&conf->resync_lock); +} + +static void allow_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->nr_pending--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void freeze_array(conf_t *conf) +{ + /* stop syncio and normal IO and wait for everything to + * go quiet. + * We increment barrier and nr_waiting, and then + * wait until barrier+nr_pending match nr_queued+2 + */ + spin_lock_irq(&conf->resync_lock); + conf->barrier++; + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, + conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + spin_unlock_irq(&conf->resync_lock); +} + +static void unfreeze_array(conf_t *conf) +{ + /* reverse the effect of the freeze */ + spin_lock_irq(&conf->resync_lock); + conf->barrier--; + conf->nr_waiting--; + wake_up(&conf->wait_barrier); spin_unlock_irq(&conf->resync_lock); } @@ -674,6 +746,8 @@ static int make_request(request_queue_t *q, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); + struct bio_list bl; + unsigned long flags; if (unlikely(bio_barrier(bio))) { bio_endio(bio, bio->bi_size, -EOPNOTSUPP); @@ -719,10 +793,7 @@ static int make_request(request_queue_t *q, struct bio * bio) * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); @@ -734,6 +805,7 @@ static int make_request(request_queue_t *q, struct bio * bio) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_sector; + r10_bio->state = 0; if (rw == READ) { /* @@ -778,13 +850,16 @@ static int make_request(request_queue_t *q, struct bio * bio) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; - } else + } else { r10_bio->devs[i].bio = NULL; + set_bit(R10BIO_Degraded, &r10_bio->state); + } } rcu_read_unlock(); - atomic_set(&r10_bio->remaining, 1); + atomic_set(&r10_bio->remaining, 0); + bio_list_init(&bl); for (i = 0; i < conf->copies; i++) { struct bio *mbio; int d = r10_bio->devs[i].devnum; @@ -802,13 +877,14 @@ static int make_request(request_queue_t *q, struct bio * bio) mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); - generic_make_request(mbio); + bio_list_add(&bl, mbio); } - if (atomic_dec_and_test(&r10_bio->remaining)) { - md_write_end(mddev); - raid_end_bio_io(r10_bio); - } + bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); + spin_lock_irqsave(&conf->device_lock, flags); + bio_list_merge(&conf->pending_bio_list, &bl); + blk_plug_device(mddev->queue); + spin_unlock_irqrestore(&conf->device_lock, flags); return 0; } @@ -897,13 +973,8 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, - conf->resync_lock, unplug_slaves(conf->mddev)); - spin_unlock_irq(&conf->resync_lock); - - if (conf->barrier) BUG(); - if (waitqueue_active(&conf->wait_idle)) BUG(); + wait_barrier(conf); + allow_barrier(conf); mempool_destroy(conf->r10buf_pool); conf->r10buf_pool = NULL; @@ -971,7 +1042,12 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) if (!enough(conf)) return 0; - for (mirror=0; mirror < mddev->raid_disks; mirror++) + if (rdev->saved_raid_disk >= 0 && + conf->mirrors[rdev->saved_raid_disk].rdev == NULL) + mirror = rdev->saved_raid_disk; + else + mirror = 0; + for ( ; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { blk_queue_stack_limits(mddev->queue, @@ -987,6 +1063,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) p->head_position = 0; rdev->raid_disk = mirror; found = 1; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; rcu_assign_pointer(p->rdev, rdev); break; } @@ -1027,7 +1105,6 @@ abort: static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); conf_t *conf = mddev_to_conf(r10_bio->mddev); int i,d; @@ -1042,9 +1119,16 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) BUG(); update_head_pos(i, r10_bio); d = r10_bio->devs[i].devnum; - if (!uptodate) - md_error(r10_bio->mddev, - conf->mirrors[d].rdev); + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_bit(R10BIO_Uptodate, &r10_bio->state); + else { + atomic_add(r10_bio->sectors, + &conf->mirrors[d].rdev->corrected_errors); + if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) + md_error(r10_bio->mddev, + conf->mirrors[d].rdev); + } /* for reconstruct, we always reschedule after a read. * for resync, only after all reads @@ -1132,23 +1216,32 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) fbio = r10_bio->devs[i].bio; /* now find blocks with errors */ - for (i=first+1 ; i < conf->copies ; i++) { - int vcnt, j, d; + for (i=0 ; i < conf->copies ; i++) { + int j, d; + int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); - if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) - continue; - /* We know that the bi_io_vec layout is the same for - * both 'first' and 'i', so we just compare them. - * All vec entries are PAGE_SIZE; - */ tbio = r10_bio->devs[i].bio; - vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); - for (j = 0; j < vcnt; j++) - if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), - page_address(tbio->bi_io_vec[j].bv_page), - PAGE_SIZE)) - break; - if (j == vcnt) + + if (tbio->bi_end_io != end_sync_read) + continue; + if (i == first) + continue; + if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) { + /* We know that the bi_io_vec layout is the same for + * both 'first' and 'i', so we just compare them. + * All vec entries are PAGE_SIZE; + */ + for (j = 0; j < vcnt; j++) + if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), + page_address(tbio->bi_io_vec[j].bv_page), + PAGE_SIZE)) + break; + if (j == vcnt) + continue; + mddev->resync_mismatches += r10_bio->sectors; + } + if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) + /* Don't fix anything. */ continue; /* Ok, we need to write this bio * First we need to fixup bv_offset, bv_len and @@ -1227,7 +1320,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); - generic_make_request(wbio); + if (test_bit(R10BIO_Uptodate, &r10_bio->state)) + generic_make_request(wbio); + else + bio_endio(wbio, wbio->bi_size, -EIO); } @@ -1254,10 +1350,31 @@ static void raid10d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; spin_lock_irqsave(&conf->device_lock, flags); + + if (conf->pending_bio_list.head) { + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(mddev->queue); + spin_unlock_irqrestore(&conf->device_lock, flags); + /* flush any pending bitmap writes to disk before proceeding w/ I/O */ + if (bitmap_unplug(mddev->bitmap) != 0) + printk("%s: bitmap file write failed!\n", mdname(mddev)); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + unplug = 1; + + continue; + } + if (list_empty(head)) break; r10_bio = list_entry(head->prev, r10bio_t, retry_list); list_del(head->prev); + conf->nr_queued--; spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; @@ -1270,8 +1387,96 @@ static void raid10d(mddev_t *mddev) unplug = 1; } else { int mirror; + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen. + */ + int sect = 0; /* Offset from r10_bio->sector */ + int sectors = r10_bio->sectors; + freeze_array(conf); + if (mddev->ro == 0) while(sectors) { + int s = sectors; + int sl = r10_bio->read_slot; + int success = 0; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + do { + int d = r10_bio->devs[sl].devnum; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags) && + sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ)) + success = 1; + else { + sl++; + if (sl == conf->copies) + sl = 0; + } + } while (!success && sl != r10_bio->read_slot); + + if (success) { + int start = sl; + /* write it back and re-read */ + while (sl != r10_bio->read_slot) { + int d; + if (sl==0) + sl = conf->copies; + sl--; + d = r10_bio->devs[sl].devnum; + rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, conf->tmppage, WRITE) == 0) + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + sl = start; + while (sl != r10_bio->read_slot) { + int d; + if (sl==0) + sl = conf->copies; + sl--; + d = r10_bio->devs[sl].devnum; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, conf->tmppage, READ) == 0) + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + } else { + /* Cannot read from anywhere -- bye bye array */ + md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); + break; + } + sectors -= s; + sect += s; + } + + unfreeze_array(conf); + bio = r10_bio->devs[r10_bio->read_slot].bio; - r10_bio->devs[r10_bio->read_slot].bio = NULL; + r10_bio->devs[r10_bio->read_slot].bio = + mddev->ro ? IO_BLOCKED : NULL; bio_put(bio); mirror = read_balance(conf, r10_bio); if (mirror == -1) { @@ -1360,6 +1565,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i sector_t max_sector, nr_sectors; int disk; int i; + int max_sync; + int sync_blocks; sector_t sectors_skipped = 0; int chunks_skipped = 0; @@ -1373,6 +1580,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sector = mddev->resync_max_sectors; if (sector_nr >= max_sector) { + /* If we aborted, we need to abort the + * sync on the 'current' bitmap chucks (there can + * be several when recovering multiple devices). + * as we may have started syncing it but not finished. + * We can find the current address in + * mddev->curr_resync, but for recovery, + * we need to convert that to several + * virtual addresses. + */ + if (mddev->curr_resync < max_sector) { /* aborted */ + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + bitmap_end_sync(mddev->bitmap, mddev->curr_resync, + &sync_blocks, 1); + else for (i=0; i<conf->raid_disks; i++) { + sector_t sect = + raid10_find_virt(conf, mddev->curr_resync, i); + bitmap_end_sync(mddev->bitmap, sect, + &sync_blocks, 1); + } + } else /* completed sync */ + conf->fullsync = 0; + + bitmap_close_sync(mddev->bitmap); close_sync(conf); *skipped = 1; return sectors_skipped; @@ -1395,9 +1625,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i * If there is non-resync activity waiting for us then * put in a delay to throttle resync. */ - if (!go_faster && waitqueue_active(&conf->wait_resume)) + if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - device_barrier(conf, sector_nr + RESYNC_SECTORS); /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that @@ -1414,6 +1643,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i * end_sync_write if we will want to write. */ + max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ int i, j, k; @@ -1422,14 +1652,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0 ; i<conf->raid_disks; i++) if (conf->mirrors[i].rdev && !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { + int still_degraded = 0; /* want to reconstruct this device */ r10bio_t *rb2 = r10_bio; + sector_t sect = raid10_find_virt(conf, sector_nr, i); + int must_sync; + /* Unless we are doing a full sync, we only need + * to recover the block if it is set in the bitmap + */ + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, 1); + if (sync_blocks < max_sync) + max_sync = sync_blocks; + if (!must_sync && + !conf->fullsync) { + /* yep, skip the sync_blocks here, but don't assume + * that there will never be anything to do here + */ + chunks_skipped = -1; + continue; + } r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); - spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; - if (rb2) conf->barrier++; - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf, rb2 != NULL); atomic_set(&r10_bio->remaining, 0); r10_bio->master_bio = (struct bio*)rb2; @@ -1437,8 +1682,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i atomic_inc(&rb2->remaining); r10_bio->mddev = mddev; set_bit(R10BIO_IsRecover, &r10_bio->state); - r10_bio->sector = raid10_find_virt(conf, sector_nr, i); + r10_bio->sector = sect; + raid10_find_phys(conf, r10_bio); + /* Need to check if this section will still be + * degraded + */ + for (j=0; j<conf->copies;j++) { + int d = r10_bio->devs[j].devnum; + if (conf->mirrors[d].rdev == NULL || + test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { + still_degraded = 1; + break; + } + } + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, still_degraded); + for (j=0; j<conf->copies;j++) { int d = r10_bio->devs[j].devnum; if (conf->mirrors[d].rdev && @@ -1498,14 +1758,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i } else { /* resync. Schedule a read for every block at this virt offset */ int count = 0; - r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); - spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + if (!bitmap_start_sync(mddev->bitmap, sector_nr, + &sync_blocks, mddev->degraded) && + !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + /* We can skip this block */ + *skipped = 1; + return sync_blocks + sectors_skipped; + } + if (sync_blocks < max_sync) + max_sync = sync_blocks; + r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); r10_bio->mddev = mddev; atomic_set(&r10_bio->remaining, 0); + raise_barrier(conf, 0); + conf->next_resync = sector_nr; r10_bio->master_bio = NULL; r10_bio->sector = sector_nr; @@ -1558,6 +1826,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i } nr_sectors = 0; + if (sector_nr + max_sync < max_sector) + max_sector = sector_nr + max_sync; do { struct page *page; int len = PAGE_SIZE; @@ -1632,11 +1902,11 @@ static int run(mddev_t *mddev) int nc, fc; sector_t stride, size; - if (mddev->level != 10) { - printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n", - mdname(mddev), mddev->level); - goto out; + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid10: non-zero chunk size required.\n"); + return -EINVAL; } + nc = mddev->layout & 255; fc = (mddev->layout >> 8) & 255; if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || @@ -1650,22 +1920,24 @@ static int run(mddev_t *mddev) * bookkeeping area. [whatever we allocate in run(), * should be freed in stop()] */ - conf = kmalloc(sizeof(conf_t), GFP_KERNEL); + conf = kzalloc(sizeof(conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out; } - memset(conf, 0, sizeof(*conf)); - conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, + conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; } - memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); + + conf->tmppage = alloc_page(GFP_KERNEL); + if (!conf->tmppage) + goto out_free_conf; conf->near_copies = nc; conf->far_copies = fc; @@ -1713,8 +1985,7 @@ static int run(mddev_t *mddev) INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock); - init_waitqueue_head(&conf->wait_idle); - init_waitqueue_head(&conf->wait_resume); + init_waitqueue_head(&conf->wait_barrier); /* need to check that every block has at least one working mirror */ if (!enough(conf)) { @@ -1763,7 +2034,7 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE; + int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE; stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; @@ -1776,6 +2047,7 @@ static int run(mddev_t *mddev) out_free_conf: if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); + safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); mddev->private = NULL; @@ -1798,10 +2070,31 @@ static int stop(mddev_t *mddev) return 0; } +static void raid10_quiesce(mddev_t *mddev, int state) +{ + conf_t *conf = mddev_to_conf(mddev); + + switch(state) { + case 1: + raise_barrier(conf, 0); + break; + case 0: + lower_barrier(conf); + break; + } + if (mddev->thread) { + if (mddev->bitmap) + mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; + else + mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + md_wakeup_thread(mddev->thread); + } +} -static mdk_personality_t raid10_personality = +static struct mdk_personality raid10_personality = { .name = "raid10", + .level = 10, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -1812,19 +2105,22 @@ static mdk_personality_t raid10_personality = .hot_remove_disk= raid10_remove_disk, .spare_active = raid10_spare_active, .sync_request = sync_request, + .quiesce = raid10_quiesce, }; static int __init raid_init(void) { - return register_md_personality(RAID10, &raid10_personality); + return register_md_personality(&raid10_personality); } static void raid_exit(void) { - unregister_md_personality(RAID10); + unregister_md_personality(&raid10_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ +MODULE_ALIAS("md-raid10"); +MODULE_ALIAS("md-level-10"); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fafc4bc045f..54f4a9847e3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -35,12 +35,10 @@ #define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SECTORS (STRIPE_SIZE>>9) #define IO_THRESHOLD 1 -#define HASH_PAGES 1 -#define HASH_PAGES_ORDER 0 -#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) +#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) -#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]) +#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])) /* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap. There may be several bio's per stripe+device, and @@ -113,29 +111,21 @@ static void release_stripe(struct stripe_head *sh) spin_unlock_irqrestore(&conf->device_lock, flags); } -static void remove_hash(struct stripe_head *sh) +static inline void remove_hash(struct stripe_head *sh) { PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); - if (sh->hash_pprev) { - if (sh->hash_next) - sh->hash_next->hash_pprev = sh->hash_pprev; - *sh->hash_pprev = sh->hash_next; - sh->hash_pprev = NULL; - } + hlist_del_init(&sh->hash); } -static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) +static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) { - struct stripe_head **shp = &stripe_hash(conf, sh->sector); + struct hlist_head *hp = stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); CHECK_DEVLOCK(); - if ((sh->hash_next = *shp) != NULL) - (*shp)->hash_pprev = &sh->hash_next; - *shp = sh; - sh->hash_pprev = shp; + hlist_add_head(&sh->hash, hp); } @@ -167,7 +157,7 @@ static void shrink_buffers(struct stripe_head *sh, int num) if (!p) continue; sh->dev[i].page = NULL; - page_cache_release(p); + put_page(p); } } @@ -228,10 +218,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) { struct stripe_head *sh; + struct hlist_node *hn; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); - for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) + hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) if (sh->sector == sector) return sh; PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -417,7 +408,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, set_bit(R5_UPTODATE, &sh->dev[i].flags); #endif if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - printk("R5: read error corrected!!\n"); + printk(KERN_INFO "raid5: read error corrected!!\n"); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } @@ -428,13 +419,14 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&conf->disks[i].rdev->read_errors); if (conf->mddev->degraded) - printk("R5: read error not correctable.\n"); + printk(KERN_WARNING "raid5: read error not correctable.\n"); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk("R5: read error NOT corrected!!\n"); + printk(KERN_WARNING "raid5: read error NOT corrected!!\n"); else if (atomic_read(&conf->disks[i].rdev->read_errors) > conf->max_nr_stripes) - printk("raid5: Too many read errors, failing device.\n"); + printk(KERN_WARNING + "raid5: Too many read errors, failing device.\n"); else retry = 1; if (retry) @@ -604,7 +596,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks, *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; break; default: - printk("raid5: unsupported algorithm %d\n", + printk(KERN_ERR "raid5: unsupported algorithm %d\n", conf->algorithm); } @@ -645,7 +637,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) i -= (sh->pd_idx + 1); break; default: - printk("raid5: unsupported algorithm %d\n", + printk(KERN_ERR "raid5: unsupported algorithm %d\n", conf->algorithm); } @@ -654,7 +646,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { - printk("compute_blocknr: map not correct\n"); + printk(KERN_ERR "compute_blocknr: map not correct\n"); return 0; } return r_sector; @@ -737,7 +729,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx) if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) ptr[count++] = p; else - printk("compute_block() %d, stripe %llu, %d" + printk(KERN_ERR "compute_block() %d, stripe %llu, %d" " not present\n", dd_idx, (unsigned long long)sh->sector, i); @@ -960,11 +952,11 @@ static void handle_stripe(struct stripe_head *sh) syncing = test_bit(STRIPE_SYNCING, &sh->state); /* Now to look around and see what can be done */ + rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - clear_bit(R5_Syncio, &dev->flags); PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); @@ -1003,9 +995,9 @@ static void handle_stripe(struct stripe_head *sh) non_overwrite++; } if (dev->written) written++; - rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ + rdev = rcu_dereference(conf->disks[i].rdev); if (!rdev || !test_bit(In_sync, &rdev->flags)) { - /* The ReadError flag wil just be confusing now */ + /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); clear_bit(R5_ReWrite, &dev->flags); } @@ -1016,6 +1008,7 @@ static void handle_stripe(struct stripe_head *sh) } else set_bit(R5_Insync, &dev->flags); } + rcu_read_unlock(); PRINTK("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d\n", locked, uptodate, to_read, to_write, failed, failed_num); @@ -1027,10 +1020,13 @@ static void handle_stripe(struct stripe_head *sh) int bitmap_end = 0; if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - mdk_rdev_t *rdev = conf->disks[i].rdev; + mdk_rdev_t *rdev; + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) /* multiple read failures in one stripe */ md_error(conf->mddev, rdev); + rcu_read_unlock(); } spin_lock_irq(&conf->device_lock); @@ -1179,9 +1175,6 @@ static void handle_stripe(struct stripe_head *sh) locked++; PRINTK("Reading block %d (sync=%d)\n", i, syncing); - if (syncing) - md_sync_acct(conf->disks[i].rdev->bdev, - STRIPE_SECTORS); } } } @@ -1288,7 +1281,7 @@ static void handle_stripe(struct stripe_head *sh) * is available */ if (syncing && locked == 0 && - !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) { + !test_bit(STRIPE_INSYNC, &sh->state)) { set_bit(STRIPE_HANDLE, &sh->state); if (failed == 0) { char *pagea; @@ -1306,27 +1299,25 @@ static void handle_stripe(struct stripe_head *sh) if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); + else { + compute_block(sh, sh->pd_idx); + uptodate++; + } } } if (!test_bit(STRIPE_INSYNC, &sh->state)) { + /* either failed parity check, or recovery is happening */ if (failed==0) failed_num = sh->pd_idx; - /* should be able to compute the missing block and write it to spare */ - if (!test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)) { - if (uptodate+1 != disks) - BUG(); - compute_block(sh, failed_num); - uptodate++; - } - if (uptodate != disks) - BUG(); dev = &sh->dev[failed_num]; + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); + BUG_ON(uptodate != disks); + set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); clear_bit(STRIPE_DEGRADED, &sh->state); locked++; set_bit(STRIPE_INSYNC, &sh->state); - set_bit(R5_Syncio, &dev->flags); } } if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { @@ -1392,7 +1383,7 @@ static void handle_stripe(struct stripe_head *sh) rcu_read_unlock(); if (rdev) { - if (test_bit(R5_Syncio, &sh->dev[i].flags)) + if (syncing) md_sync_acct(rdev->bdev, STRIPE_SECTORS); bi->bi_bdev = rdev->bdev; @@ -1409,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, &sh->dev[i].flags)) + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); generic_make_request(bi); } else { if (rw == 1) @@ -1822,21 +1816,21 @@ static int run(mddev_t *mddev) struct list_head *tmp; if (mddev->level != 5 && mddev->level != 4) { - printk("raid5: %s: raid level not set to 4/5 (%d)\n", mdname(mddev), mddev->level); + printk(KERN_ERR "raid5: %s: raid level not set to 4/5 (%d)\n", + mdname(mddev), mddev->level); return -EIO; } - mddev->private = kmalloc (sizeof (raid5_conf_t) - + mddev->raid_disks * sizeof(struct disk_info), - GFP_KERNEL); + mddev->private = kzalloc(sizeof (raid5_conf_t) + + mddev->raid_disks * sizeof(struct disk_info), + GFP_KERNEL); if ((conf = mddev->private) == NULL) goto abort; - memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) ); + conf->mddev = mddev; - if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) + if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); @@ -1903,10 +1897,17 @@ static int run(mddev_t *mddev) if (mddev->degraded == 1 && mddev->recovery_cp != MaxSector) { - printk(KERN_ERR - "raid5: cannot start dirty degraded array for %s\n", - mdname(mddev)); - goto abort; + if (mddev->ok_start_degraded) + printk(KERN_WARNING + "raid5: starting dirty degraded array: %s" + "- data corruption possible.\n", + mdname(mddev)); + else { + printk(KERN_ERR + "raid5: cannot start dirty degraded array for %s\n", + mdname(mddev)); + goto abort; + } } { @@ -1948,7 +1949,7 @@ static int run(mddev_t *mddev) */ { int stripe = (mddev->raid_disks-1) * mddev->chunk_size - / PAGE_CACHE_SIZE; + / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -1956,9 +1957,6 @@ static int run(mddev_t *mddev) /* Ok, everything is just fine now */ sysfs_create_group(&mddev->kobj, &raid5_attrs_group); - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - mddev->queue->unplug_fn = raid5_unplug_device; mddev->queue->issue_flush_fn = raid5_issue_flush; @@ -1967,9 +1965,7 @@ static int run(mddev_t *mddev) abort: if (conf) { print_raid5_conf(conf); - if (conf->stripe_hashtbl) - free_pages((unsigned long) conf->stripe_hashtbl, - HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); kfree(conf); } mddev->private = NULL; @@ -1986,7 +1982,7 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; shrink_stripes(conf); - free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); kfree(conf); @@ -2014,12 +2010,12 @@ static void print_sh (struct stripe_head *sh) static void printall (raid5_conf_t *conf) { struct stripe_head *sh; + struct hlist_node *hn; int i; spin_lock_irq(&conf->device_lock); for (i = 0; i < NR_HASH; i++) { - sh = conf->stripe_hashtbl[i]; - for (; sh; sh = sh->hash_next) { + hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) { if (sh->raid_conf != conf) continue; print_sh(sh); @@ -2192,17 +2188,12 @@ static void raid5_quiesce(mddev_t *mddev, int state) spin_unlock_irq(&conf->device_lock); break; } - if (mddev->thread) { - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - else - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - md_wakeup_thread(mddev->thread); - } } -static mdk_personality_t raid5_personality= + +static struct mdk_personality raid5_personality = { .name = "raid5", + .level = 5, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2217,17 +2208,42 @@ static mdk_personality_t raid5_personality= .quiesce = raid5_quiesce, }; -static int __init raid5_init (void) +static struct mdk_personality raid4_personality = { - return register_md_personality (RAID5, &raid5_personality); + .name = "raid4", + .level = 4, + .owner = THIS_MODULE, + .make_request = make_request, + .run = run, + .stop = stop, + .status = status, + .error_handler = error, + .hot_add_disk = raid5_add_disk, + .hot_remove_disk= raid5_remove_disk, + .spare_active = raid5_spare_active, + .sync_request = sync_request, + .resize = raid5_resize, + .quiesce = raid5_quiesce, +}; + +static int __init raid5_init(void) +{ + register_md_personality(&raid5_personality); + register_md_personality(&raid4_personality); + return 0; } -static void raid5_exit (void) +static void raid5_exit(void) { - unregister_md_personality (RAID5); + unregister_md_personality(&raid5_personality); + unregister_md_personality(&raid4_personality); } module_init(raid5_init); module_exit(raid5_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-4"); /* RAID5 */ +MODULE_ALIAS("md-raid5"); +MODULE_ALIAS("md-raid4"); +MODULE_ALIAS("md-level-5"); +MODULE_ALIAS("md-level-4"); diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 0000d162d19..8c823d686a6 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -40,12 +40,10 @@ #define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SECTORS (STRIPE_SIZE>>9) #define IO_THRESHOLD 1 -#define HASH_PAGES 1 -#define HASH_PAGES_ORDER 0 -#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) +#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) -#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]) +#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])) /* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap. There may be several bio's per stripe+device, and @@ -132,29 +130,21 @@ static void release_stripe(struct stripe_head *sh) spin_unlock_irqrestore(&conf->device_lock, flags); } -static void remove_hash(struct stripe_head *sh) +static inline void remove_hash(struct stripe_head *sh) { PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); - if (sh->hash_pprev) { - if (sh->hash_next) - sh->hash_next->hash_pprev = sh->hash_pprev; - *sh->hash_pprev = sh->hash_next; - sh->hash_pprev = NULL; - } + hlist_del_init(&sh->hash); } -static __inline__ void insert_hash(raid6_conf_t *conf, struct stripe_head *sh) +static inline void insert_hash(raid6_conf_t *conf, struct stripe_head *sh) { - struct stripe_head **shp = &stripe_hash(conf, sh->sector); + struct hlist_head *hp = stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); CHECK_DEVLOCK(); - if ((sh->hash_next = *shp) != NULL) - (*shp)->hash_pprev = &sh->hash_next; - *shp = sh; - sh->hash_pprev = shp; + hlist_add_head(&sh->hash, hp); } @@ -186,7 +176,7 @@ static void shrink_buffers(struct stripe_head *sh, int num) if (!p) continue; sh->dev[i].page = NULL; - page_cache_release(p); + put_page(p); } } @@ -247,10 +237,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i static struct stripe_head *__find_stripe(raid6_conf_t *conf, sector_t sector) { struct stripe_head *sh; + struct hlist_node *hn; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); - for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) + hlist_for_each_entry (sh, hn, stripe_hash(conf, sector), hash) if (sh->sector == sector) return sh; PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -367,8 +358,8 @@ static void shrink_stripes(raid6_conf_t *conf) conf->slab_cache = NULL; } -static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done, - int error) +static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done, + int error) { struct stripe_head *sh = bi->bi_private; raid6_conf_t *conf = sh->raid_conf; @@ -420,9 +411,35 @@ static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done, #else set_bit(R5_UPTODATE, &sh->dev[i].flags); #endif + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { + printk(KERN_INFO "raid6: read error corrected!!\n"); + clear_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReWrite, &sh->dev[i].flags); + } + if (atomic_read(&conf->disks[i].rdev->read_errors)) + atomic_set(&conf->disks[i].rdev->read_errors, 0); } else { - md_error(conf->mddev, conf->disks[i].rdev); + int retry = 0; clear_bit(R5_UPTODATE, &sh->dev[i].flags); + atomic_inc(&conf->disks[i].rdev->read_errors); + if (conf->mddev->degraded) + printk(KERN_WARNING "raid6: read error not correctable.\n"); + else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) + /* Oh, no!!! */ + printk(KERN_WARNING "raid6: read error NOT corrected!!\n"); + else if (atomic_read(&conf->disks[i].rdev->read_errors) + > conf->max_nr_stripes) + printk(KERN_WARNING + "raid6: Too many read errors, failing device.\n"); + else + retry = 1; + if (retry) + set_bit(R5_ReadError, &sh->dev[i].flags); + else { + clear_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReWrite, &sh->dev[i].flags); + md_error(conf->mddev, conf->disks[i].rdev); + } } rdev_dec_pending(conf->disks[i].rdev, conf->mddev); #if 0 @@ -805,7 +822,7 @@ static void compute_parity(struct stripe_head *sh, int method) } /* Compute one missing block */ -static void compute_block_1(struct stripe_head *sh, int dd_idx) +static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) { raid6_conf_t *conf = sh->raid_conf; int i, count, disks = conf->raid_disks; @@ -821,7 +838,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx) compute_parity(sh, UPDATE_PARITY); } else { ptr[0] = page_address(sh->dev[dd_idx].page); - memset(ptr[0], 0, STRIPE_SIZE); + if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); count = 1; for (i = disks ; i--; ) { if (i == dd_idx || i == qd_idx) @@ -838,7 +855,8 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx) } if (count != 1) xor_block(count, STRIPE_SIZE, ptr); - set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); } } @@ -871,7 +889,7 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) return; } else { /* We're missing D+Q; recompute D from P */ - compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1); + compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); compute_parity(sh, UPDATE_PARITY); /* Is this necessary? */ return; } @@ -982,6 +1000,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in } +static int page_is_zero(struct page *p) +{ + char *a = page_address(p); + return ((*(u32*)a) == 0 && + memcmp(a, a+4, STRIPE_SIZE-4)==0); +} /* * handle_stripe - do things to a stripe. * @@ -1000,7 +1024,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in * */ -static void handle_stripe(struct stripe_head *sh) +static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) { raid6_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks; @@ -1027,11 +1051,11 @@ static void handle_stripe(struct stripe_head *sh) syncing = test_bit(STRIPE_SYNCING, &sh->state); /* Now to look around and see what can be done */ + rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - clear_bit(R5_Syncio, &dev->flags); PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); @@ -1070,14 +1094,21 @@ static void handle_stripe(struct stripe_head *sh) non_overwrite++; } if (dev->written) written++; - rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ + rdev = rcu_dereference(conf->disks[i].rdev); if (!rdev || !test_bit(In_sync, &rdev->flags)) { + /* The ReadError flag will just be confusing now */ + clear_bit(R5_ReadError, &dev->flags); + clear_bit(R5_ReWrite, &dev->flags); + } + if (!rdev || !test_bit(In_sync, &rdev->flags) + || test_bit(R5_ReadError, &dev->flags)) { if ( failed < 2 ) failed_num[failed] = i; failed++; } else set_bit(R5_Insync, &dev->flags); } + rcu_read_unlock(); PRINTK("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", locked, uptodate, to_read, to_write, failed, @@ -1088,6 +1119,17 @@ static void handle_stripe(struct stripe_head *sh) if (failed > 2 && to_read+to_write+written) { for (i=disks; i--; ) { int bitmap_end = 0; + + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { + mdk_rdev_t *rdev; + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(In_sync, &rdev->flags)) + /* multiple read failures in one stripe */ + md_error(conf->mddev, rdev); + rcu_read_unlock(); + } + spin_lock_irq(&conf->device_lock); /* fail all writes first */ bi = sh->dev[i].towrite; @@ -1123,7 +1165,8 @@ static void handle_stripe(struct stripe_head *sh) } /* fail any reads if this device is non-operational */ - if (!test_bit(R5_Insync, &sh->dev[i].flags)) { + if (!test_bit(R5_Insync, &sh->dev[i].flags) || + test_bit(R5_ReadError, &sh->dev[i].flags)) { bi = sh->dev[i].toread; sh->dev[i].toread = NULL; if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) @@ -1228,7 +1271,7 @@ static void handle_stripe(struct stripe_head *sh) if (uptodate == disks-1) { PRINTK("Computing stripe %llu block %d\n", (unsigned long long)sh->sector, i); - compute_block_1(sh, i); + compute_block_1(sh, i, 0); uptodate++; } else if ( uptodate == disks-2 && failed >= 2 ) { /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */ @@ -1259,9 +1302,6 @@ static void handle_stripe(struct stripe_head *sh) locked++; PRINTK("Reading block %d (sync=%d)\n", i, syncing); - if (syncing) - md_sync_acct(conf->disks[i].rdev->bdev, - STRIPE_SECTORS); } } } @@ -1323,7 +1363,7 @@ static void handle_stripe(struct stripe_head *sh) /* We have failed blocks and need to compute them */ switch ( failed ) { case 0: BUG(); - case 1: compute_block_1(sh, failed_num[0]); break; + case 1: compute_block_1(sh, failed_num[0], 0); break; case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break; default: BUG(); /* This request should have been failed? */ } @@ -1338,12 +1378,10 @@ static void handle_stripe(struct stripe_head *sh) (unsigned long long)sh->sector, i); locked++; set_bit(R5_Wantwrite, &sh->dev[i].flags); -#if 0 /**** FIX: I don't understand the logic here... ****/ - if (!test_bit(R5_Insync, &sh->dev[i].flags) - || ((i==pd_idx || i==qd_idx) && failed == 0)) /* FIX? */ - set_bit(STRIPE_INSYNC, &sh->state); -#endif } + /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ + set_bit(STRIPE_INSYNC, &sh->state); + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) @@ -1356,84 +1394,119 @@ static void handle_stripe(struct stripe_head *sh) * Any reads will already have been scheduled, so we just see if enough data * is available */ - if (syncing && locked == 0 && - !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 2) { - set_bit(STRIPE_HANDLE, &sh->state); -#if 0 /* RAID-6: Don't support CHECK PARITY yet */ - if (failed == 0) { - char *pagea; - if (uptodate != disks) - BUG(); - compute_parity(sh, CHECK_PARITY); - uptodate--; - pagea = page_address(sh->dev[pd_idx].page); - if ((*(u32*)pagea) == 0 && - !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) { - /* parity is correct (on disc, not in buffer any more) */ - set_bit(STRIPE_INSYNC, &sh->state); - } - } -#endif - if (!test_bit(STRIPE_INSYNC, &sh->state)) { - int failed_needupdate[2]; - struct r5dev *adev, *bdev; - - if ( failed < 1 ) - failed_num[0] = pd_idx; - if ( failed < 2 ) - failed_num[1] = (failed_num[0] == qd_idx) ? pd_idx : qd_idx; + if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { + int update_p = 0, update_q = 0; + struct r5dev *dev; - failed_needupdate[0] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[0]].flags); - failed_needupdate[1] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[1]].flags); + set_bit(STRIPE_HANDLE, &sh->state); - PRINTK("sync: failed=%d num=%d,%d fnu=%u%u\n", - failed, failed_num[0], failed_num[1], failed_needupdate[0], failed_needupdate[1]); + BUG_ON(failed>2); + BUG_ON(uptodate < disks); + /* Want to check and possibly repair P and Q. + * However there could be one 'failed' device, in which + * case we can only check one of them, possibly using the + * other to generate missing data + */ -#if 0 /* RAID-6: This code seems to require that CHECK_PARITY destroys the uptodateness of the parity */ - /* should be able to compute the missing block(s) and write to spare */ - if ( failed_needupdate[0] ^ failed_needupdate[1] ) { - if (uptodate+1 != disks) - BUG(); - compute_block_1(sh, failed_needupdate[0] ? failed_num[0] : failed_num[1]); - uptodate++; - } else if ( failed_needupdate[0] & failed_needupdate[1] ) { - if (uptodate+2 != disks) - BUG(); - compute_block_2(sh, failed_num[0], failed_num[1]); - uptodate += 2; + /* If !tmp_page, we cannot do the calculations, + * but as we have set STRIPE_HANDLE, we will soon be called + * by stripe_handle with a tmp_page - just wait until then. + */ + if (tmp_page) { + if (failed == q_failed) { + /* The only possible failed device holds 'Q', so it makes + * sense to check P (If anything else were failed, we would + * have used P to recreate it). + */ + compute_block_1(sh, pd_idx, 1); + if (!page_is_zero(sh->dev[pd_idx].page)) { + compute_block_1(sh,pd_idx,0); + update_p = 1; + } + } + if (!q_failed && failed < 2) { + /* q is not failed, and we didn't use it to generate + * anything, so it makes sense to check it + */ + memcpy(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE); + compute_parity(sh, UPDATE_PARITY); + if (memcmp(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE)!= 0) { + clear_bit(STRIPE_INSYNC, &sh->state); + update_q = 1; + } + } + if (update_p || update_q) { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + update_p = update_q = 0; } -#else - compute_block_2(sh, failed_num[0], failed_num[1]); - uptodate += failed_needupdate[0] + failed_needupdate[1]; -#endif - if (uptodate != disks) - BUG(); + /* now write out any block on a failed drive, + * or P or Q if they need it + */ - PRINTK("Marking for sync stripe %llu blocks %d,%d\n", - (unsigned long long)sh->sector, failed_num[0], failed_num[1]); + if (failed == 2) { + dev = &sh->dev[failed_num[1]]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } + if (failed >= 1) { + dev = &sh->dev[failed_num[0]]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } - /**** FIX: Should we really do both of these unconditionally? ****/ - adev = &sh->dev[failed_num[0]]; - locked += !test_bit(R5_LOCKED, &adev->flags); - set_bit(R5_LOCKED, &adev->flags); - set_bit(R5_Wantwrite, &adev->flags); - bdev = &sh->dev[failed_num[1]]; - locked += !test_bit(R5_LOCKED, &bdev->flags); - set_bit(R5_LOCKED, &bdev->flags); + if (update_p) { + dev = &sh->dev[pd_idx]; + locked ++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } + if (update_q) { + dev = &sh->dev[qd_idx]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + } clear_bit(STRIPE_DEGRADED, &sh->state); - set_bit(R5_Wantwrite, &bdev->flags); set_bit(STRIPE_INSYNC, &sh->state); - set_bit(R5_Syncio, &adev->flags); - set_bit(R5_Syncio, &bdev->flags); } } + if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); clear_bit(STRIPE_SYNCING, &sh->state); } + /* If the failed drives are just a ReadError, then we might need + * to progress the repair/check process + */ + if (failed <= 2 && ! conf->mddev->ro) + for (i=0; i<failed;i++) { + dev = &sh->dev[failed_num[i]]; + if (test_bit(R5_ReadError, &dev->flags) + && !test_bit(R5_LOCKED, &dev->flags) + && test_bit(R5_UPTODATE, &dev->flags) + ) { + if (!test_bit(R5_ReWrite, &dev->flags)) { + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_ReWrite, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); + } else { + /* let's read it back */ + set_bit(R5_Wantread, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); + } + } + } spin_unlock(&sh->lock); while ((bi=return_bi)) { @@ -1472,7 +1545,7 @@ static void handle_stripe(struct stripe_head *sh) rcu_read_unlock(); if (rdev) { - if (test_bit(R5_Syncio, &sh->dev[i].flags)) + if (syncing) md_sync_acct(rdev->bdev, STRIPE_SECTORS); bi->bi_bdev = rdev->bdev; @@ -1489,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, &sh->dev[i].flags)) + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); generic_make_request(bi); } else { if (rw == 1) @@ -1664,7 +1740,7 @@ static int make_request (request_queue_t *q, struct bio * bi) } finish_wait(&conf->wait_for_overlap, &w); raid6_plug_device(conf); - handle_stripe(sh); + handle_stripe(sh, NULL); release_stripe(sh); } else { /* cannot get stripe for read-ahead, just give-up */ @@ -1728,6 +1804,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return rv; } if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { /* we can skip this block, and probably more */ sync_blocks /= STRIPE_SECTORS; @@ -1765,7 +1842,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i clear_bit(STRIPE_INSYNC, &sh->state); spin_unlock(&sh->lock); - handle_stripe(sh); + handle_stripe(sh, NULL); release_stripe(sh); return STRIPE_SECTORS; @@ -1821,7 +1898,7 @@ static void raid6d (mddev_t *mddev) spin_unlock_irq(&conf->device_lock); handled++; - handle_stripe(sh); + handle_stripe(sh, conf->spare_page); release_stripe(sh); spin_lock_irq(&conf->device_lock); @@ -1848,17 +1925,19 @@ static int run(mddev_t *mddev) return -EIO; } - mddev->private = kmalloc (sizeof (raid6_conf_t) - + mddev->raid_disks * sizeof(struct disk_info), - GFP_KERNEL); + mddev->private = kzalloc(sizeof (raid6_conf_t) + + mddev->raid_disks * sizeof(struct disk_info), + GFP_KERNEL); if ((conf = mddev->private) == NULL) goto abort; - memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) ); conf->mddev = mddev; - if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) + if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) + goto abort; + + conf->spare_page = alloc_page(GFP_KERNEL); + if (!conf->spare_page) goto abort; - memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); @@ -1929,13 +2008,18 @@ static int run(mddev_t *mddev) goto abort; } -#if 0 /* FIX: For now */ if (mddev->degraded > 0 && mddev->recovery_cp != MaxSector) { - printk(KERN_ERR "raid6: cannot start dirty degraded array for %s\n", mdname(mddev)); - goto abort; + if (mddev->ok_start_degraded) + printk(KERN_WARNING "raid6: starting dirty degraded array:%s" + "- data corruption possible.\n", + mdname(mddev)); + else { + printk(KERN_ERR "raid6: cannot start dirty degraded array" + " for %s\n", mdname(mddev)); + goto abort; + } } -#endif { mddev->thread = md_register_thread(raid6d, mddev, "%s_raid6"); @@ -1977,7 +2061,7 @@ static int run(mddev_t *mddev) */ { int stripe = (mddev->raid_disks-2) * mddev->chunk_size - / PAGE_CACHE_SIZE; + / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -1985,18 +2069,14 @@ static int run(mddev_t *mddev) /* Ok, everything is just fine now */ mddev->array_size = mddev->size * (mddev->raid_disks - 2); - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - mddev->queue->unplug_fn = raid6_unplug_device; mddev->queue->issue_flush_fn = raid6_issue_flush; return 0; abort: if (conf) { print_raid6_conf(conf); - if (conf->stripe_hashtbl) - free_pages((unsigned long) conf->stripe_hashtbl, - HASH_PAGES_ORDER); + safe_put_page(conf->spare_page); + kfree(conf->stripe_hashtbl); kfree(conf); } mddev->private = NULL; @@ -2013,7 +2093,7 @@ static int stop (mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; shrink_stripes(conf); - free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf); mddev->private = NULL; @@ -2040,12 +2120,13 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh) static void printall (struct seq_file *seq, raid6_conf_t *conf) { struct stripe_head *sh; + struct hlist_node *hn; int i; spin_lock_irq(&conf->device_lock); for (i = 0; i < NR_HASH; i++) { sh = conf->stripe_hashtbl[i]; - for (; sh; sh = sh->hash_next) { + hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) { if (sh->raid_conf != conf) continue; print_sh(seq, sh); @@ -2223,17 +2304,12 @@ static void raid6_quiesce(mddev_t *mddev, int state) spin_unlock_irq(&conf->device_lock); break; } - if (mddev->thread) { - if (mddev->bitmap) - mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; - else - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - md_wakeup_thread(mddev->thread); - } } -static mdk_personality_t raid6_personality= + +static struct mdk_personality raid6_personality = { .name = "raid6", + .level = 6, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2248,7 +2324,7 @@ static mdk_personality_t raid6_personality= .quiesce = raid6_quiesce, }; -static int __init raid6_init (void) +static int __init raid6_init(void) { int e; @@ -2256,15 +2332,17 @@ static int __init raid6_init (void) if ( e ) return e; - return register_md_personality (RAID6, &raid6_personality); + return register_md_personality(&raid6_personality); } static void raid6_exit (void) { - unregister_md_personality (RAID6); + unregister_md_personality(&raid6_personality); } module_init(raid6_init); module_exit(raid6_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-8"); /* RAID6 */ +MODULE_ALIAS("md-raid6"); +MODULE_ALIAS("md-level-6"); diff --git a/drivers/media/video/cpia_pp.c b/drivers/media/video/cpia_pp.c index ddf184f95d8..6861d408f1b 100644 --- a/drivers/media/video/cpia_pp.c +++ b/drivers/media/video/cpia_pp.c @@ -170,16 +170,9 @@ static size_t cpia_read_nibble (struct parport *port, /* Does the error line indicate end of data? */ if (((i /*& 1*/) == 0) && (parport_read_status(port) & PARPORT_STATUS_ERROR)) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DBG("%s: No more nibble data (%d bytes)\n", - port->name, i/2); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + DBG("%s: No more nibble data (%d bytes)\n", + port->name, i/2); + goto end_of_data; } /* Event 7: Set nAutoFd low. */ @@ -227,18 +220,21 @@ static size_t cpia_read_nibble (struct parport *port, byte = nibble; } - i /= 2; /* i is now in bytes */ - if (i == len) { /* Read the last nibble without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } - return i; + return i/2; } /* CPiA nonstandard "Nibble Stream" mode (2 nibbles per cycle, instead of 1) diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig index 43a942a29c2..fef67710388 100644 --- a/drivers/message/i2o/Kconfig +++ b/drivers/message/i2o/Kconfig @@ -24,6 +24,18 @@ config I2O If unsure, say N. +config I2O_LCT_NOTIFY_ON_CHANGES + bool "Enable LCT notification" + depends on I2O + default y + ---help--- + Only say N here if you have a I2O controller from SUN. The SUN + firmware doesn't support LCT notification on changes. If this option + is enabled on such a controller the driver will hang up in a endless + loop. On all other controllers say Y. + + If unsure, say Y. + config I2O_EXT_ADAPTEC bool "Enable Adaptec extensions" depends on I2O diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c index 151b228e1cb..ac06f10c54e 100644 --- a/drivers/message/i2o/bus-osm.c +++ b/drivers/message/i2o/bus-osm.c @@ -17,7 +17,7 @@ #include <linux/i2o.h> #define OSM_NAME "bus-osm" -#define OSM_VERSION "$Rev$" +#define OSM_VERSION "1.317" #define OSM_DESCRIPTION "I2O Bus Adapter OSM" static struct i2o_driver i2o_bus_driver; @@ -39,18 +39,18 @@ static struct i2o_class_id i2o_bus_class_id[] = { */ static int i2o_bus_scan(struct i2o_device *dev) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) return -ETIMEDOUT; - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data. + tid); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); }; /** @@ -59,8 +59,9 @@ static int i2o_bus_scan(struct i2o_device *dev) * * Returns count. */ -static ssize_t i2o_bus_store_scan(struct device *d, struct device_attribute *attr, const char *buf, - size_t count) +static ssize_t i2o_bus_store_scan(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) { struct i2o_device *i2o_dev = to_i2o_device(d); int rc; diff --git a/drivers/message/i2o/config-osm.c b/drivers/message/i2o/config-osm.c index 10432f66520..3bba7aa82e5 100644 --- a/drivers/message/i2o/config-osm.c +++ b/drivers/message/i2o/config-osm.c @@ -22,7 +22,7 @@ #include <asm/uaccess.h> #define OSM_NAME "config-osm" -#define OSM_VERSION "1.248" +#define OSM_VERSION "1.323" #define OSM_DESCRIPTION "I2O Configuration OSM" /* access mode user rw */ diff --git a/drivers/message/i2o/core.h b/drivers/message/i2o/core.h index 9eefedb1621..90628562851 100644 --- a/drivers/message/i2o/core.h +++ b/drivers/message/i2o/core.h @@ -14,8 +14,6 @@ */ /* Exec-OSM */ -extern struct bus_type i2o_bus_type; - extern struct i2o_driver i2o_exec_driver; extern int i2o_exec_lct_get(struct i2o_controller *); @@ -23,6 +21,8 @@ extern int __init i2o_exec_init(void); extern void __exit i2o_exec_exit(void); /* driver */ +extern struct bus_type i2o_bus_type; + extern int i2o_driver_dispatch(struct i2o_controller *, u32); extern int __init i2o_driver_init(void); @@ -33,19 +33,27 @@ extern int __init i2o_pci_init(void); extern void __exit i2o_pci_exit(void); /* device */ +extern struct device_attribute i2o_device_attrs[]; + extern void i2o_device_remove(struct i2o_device *); extern int i2o_device_parse_lct(struct i2o_controller *); /* IOP */ extern struct i2o_controller *i2o_iop_alloc(void); -extern void i2o_iop_free(struct i2o_controller *); + +/** + * i2o_iop_free - Free the i2o_controller struct + * @c: I2O controller to free + */ +static inline void i2o_iop_free(struct i2o_controller *c) +{ + i2o_pool_free(&c->in_msg); + kfree(c); +} extern int i2o_iop_add(struct i2o_controller *); extern void i2o_iop_remove(struct i2o_controller *); -/* config */ -extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int); - /* control registers relative to c->base */ #define I2O_IRQ_STATUS 0x30 #define I2O_IRQ_MASK 0x34 diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index 8eb50cdb8ae..ee183053fa2 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -35,18 +35,18 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd, u32 type) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid, &msg->u.head[1]); - writel(type, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid); + msg->body[0] = cpu_to_le32(type); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); } /** @@ -123,7 +123,6 @@ int i2o_device_claim_release(struct i2o_device *dev) return rc; } - /** * i2o_device_release - release the memory for a I2O device * @dev: I2O device which should be released @@ -140,10 +139,10 @@ static void i2o_device_release(struct device *dev) kfree(i2o_dev); } - /** - * i2o_device_class_show_class_id - Displays class id of I2O device - * @cd: class device of which the class id should be displayed + * i2o_device_show_class_id - Displays class id of I2O device + * @dev: device of which the class id should be displayed + * @attr: pointer to device attribute * @buf: buffer into which the class id should be printed * * Returns the number of bytes which are printed into the buffer. @@ -159,15 +158,15 @@ static ssize_t i2o_device_show_class_id(struct device *dev, } /** - * i2o_device_class_show_tid - Displays TID of I2O device - * @cd: class device of which the TID should be displayed - * @buf: buffer into which the class id should be printed + * i2o_device_show_tid - Displays TID of I2O device + * @dev: device of which the TID should be displayed + * @attr: pointer to device attribute + * @buf: buffer into which the TID should be printed * * Returns the number of bytes which are printed into the buffer. */ static ssize_t i2o_device_show_tid(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, char *buf) { struct i2o_device *i2o_dev = to_i2o_device(dev); @@ -175,6 +174,7 @@ static ssize_t i2o_device_show_tid(struct device *dev, return strlen(buf) + 1; } +/* I2O device attributes */ struct device_attribute i2o_device_attrs[] = { __ATTR(class_id, S_IRUGO, i2o_device_show_class_id, NULL), __ATTR(tid, S_IRUGO, i2o_device_show_tid, NULL), @@ -193,12 +193,10 @@ static struct i2o_device *i2o_device_alloc(void) { struct i2o_device *dev; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); - memset(dev, 0, sizeof(*dev)); - INIT_LIST_HEAD(&dev->list); init_MUTEX(&dev->lock); @@ -209,66 +207,6 @@ static struct i2o_device *i2o_device_alloc(void) } /** - * i2o_setup_sysfs_links - Adds attributes to the I2O device - * @cd: I2O class device which is added to the I2O device class - * - * This function get called when a I2O device is added to the class. It - * creates the attributes for each device and creates user/parent symlink - * if necessary. - * - * Returns 0 on success or negative error code on failure. - */ -static void i2o_setup_sysfs_links(struct i2o_device *i2o_dev) -{ - struct i2o_controller *c = i2o_dev->iop; - struct i2o_device *tmp; - - /* create user entries for this device */ - tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid); - if (tmp && tmp != i2o_dev) - sysfs_create_link(&i2o_dev->device.kobj, - &tmp->device.kobj, "user"); - - /* create user entries refering to this device */ - list_for_each_entry(tmp, &c->devices, list) - if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid && - tmp != i2o_dev) - sysfs_create_link(&tmp->device.kobj, - &i2o_dev->device.kobj, "user"); - - /* create parent entries for this device */ - tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid); - if (tmp && tmp != i2o_dev) - sysfs_create_link(&i2o_dev->device.kobj, - &tmp->device.kobj, "parent"); - - /* create parent entries refering to this device */ - list_for_each_entry(tmp, &c->devices, list) - if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid && - tmp != i2o_dev) - sysfs_create_link(&tmp->device.kobj, - &i2o_dev->device.kobj, "parent"); -} - -static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev) -{ - struct i2o_controller *c = i2o_dev->iop; - struct i2o_device *tmp; - - sysfs_remove_link(&i2o_dev->device.kobj, "parent"); - sysfs_remove_link(&i2o_dev->device.kobj, "user"); - - list_for_each_entry(tmp, &c->devices, list) { - if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) - sysfs_remove_link(&tmp->device.kobj, "parent"); - if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid) - sysfs_remove_link(&tmp->device.kobj, "user"); - } -} - - - -/** * i2o_device_add - allocate a new I2O device and add it to the IOP * @iop: I2O controller where the device is on * @entry: LCT entry of the I2O device @@ -282,33 +220,57 @@ static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev) static struct i2o_device *i2o_device_add(struct i2o_controller *c, i2o_lct_entry * entry) { - struct i2o_device *dev; + struct i2o_device *i2o_dev, *tmp; - dev = i2o_device_alloc(); - if (IS_ERR(dev)) { + i2o_dev = i2o_device_alloc(); + if (IS_ERR(i2o_dev)) { printk(KERN_ERR "i2o: unable to allocate i2o device\n"); - return dev; + return i2o_dev; } - dev->lct_data = *entry; - dev->iop = c; + i2o_dev->lct_data = *entry; - snprintf(dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit, - dev->lct_data.tid); + snprintf(i2o_dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit, + i2o_dev->lct_data.tid); - dev->device.parent = &c->device; + i2o_dev->iop = c; + i2o_dev->device.parent = &c->device; - device_register(&dev->device); + device_register(&i2o_dev->device); - list_add_tail(&dev->list, &c->devices); + list_add_tail(&i2o_dev->list, &c->devices); - i2o_setup_sysfs_links(dev); + /* create user entries for this device */ + tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid); + if (tmp && (tmp != i2o_dev)) + sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj, + "user"); - i2o_driver_notify_device_add_all(dev); + /* create user entries refering to this device */ + list_for_each_entry(tmp, &c->devices, list) + if ((tmp->lct_data.user_tid == i2o_dev->lct_data.tid) + && (tmp != i2o_dev)) + sysfs_create_link(&tmp->device.kobj, + &i2o_dev->device.kobj, "user"); - pr_debug("i2o: device %s added\n", dev->device.bus_id); + /* create parent entries for this device */ + tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid); + if (tmp && (tmp != i2o_dev)) + sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj, + "parent"); - return dev; + /* create parent entries refering to this device */ + list_for_each_entry(tmp, &c->devices, list) + if ((tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) + && (tmp != i2o_dev)) + sysfs_create_link(&tmp->device.kobj, + &i2o_dev->device.kobj, "parent"); + + i2o_driver_notify_device_add_all(i2o_dev); + + pr_debug("i2o: device %s added\n", i2o_dev->device.bus_id); + + return i2o_dev; } /** @@ -321,9 +283,22 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c, */ void i2o_device_remove(struct i2o_device *i2o_dev) { + struct i2o_device *tmp; + struct i2o_controller *c = i2o_dev->iop; + i2o_driver_notify_device_remove_all(i2o_dev); - i2o_remove_sysfs_links(i2o_dev); + + sysfs_remove_link(&i2o_dev->device.kobj, "parent"); + sysfs_remove_link(&i2o_dev->device.kobj, "user"); + + list_for_each_entry(tmp, &c->devices, list) { + if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) + sysfs_remove_link(&tmp->device.kobj, "parent"); + if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid) + sysfs_remove_link(&tmp->device.kobj, "user"); + } list_del(&i2o_dev->list); + device_unregister(&i2o_dev->device); } @@ -341,56 +316,83 @@ int i2o_device_parse_lct(struct i2o_controller *c) { struct i2o_device *dev, *tmp; i2o_lct *lct; - int i; - int max; + u32 *dlct = c->dlct.virt; + int max = 0, i = 0; + u16 table_size; + u32 buf; down(&c->lct_lock); kfree(c->lct); - lct = c->dlct.virt; + buf = le32_to_cpu(*dlct++); + table_size = buf & 0xffff; - c->lct = kmalloc(lct->table_size * 4, GFP_KERNEL); - if (!c->lct) { + lct = c->lct = kmalloc(table_size * 4, GFP_KERNEL); + if (!lct) { up(&c->lct_lock); return -ENOMEM; } - if (lct->table_size * 4 > c->dlct.len) { - memcpy(c->lct, c->dlct.virt, c->dlct.len); - up(&c->lct_lock); - return -EAGAIN; - } + lct->lct_ver = buf >> 28; + lct->boot_tid = buf >> 16 & 0xfff; + lct->table_size = table_size; + lct->change_ind = le32_to_cpu(*dlct++); + lct->iop_flags = le32_to_cpu(*dlct++); - memcpy(c->lct, c->dlct.virt, lct->table_size * 4); - - lct = c->lct; - - max = (lct->table_size - 3) / 9; + table_size -= 3; pr_debug("%s: LCT has %d entries (LCT size: %d)\n", c->name, max, lct->table_size); - /* remove devices, which are not in the LCT anymore */ - list_for_each_entry_safe(dev, tmp, &c->devices, list) { + while (table_size > 0) { + i2o_lct_entry *entry = &lct->lct_entry[max]; int found = 0; - for (i = 0; i < max; i++) { - if (lct->lct_entry[i].tid == dev->lct_data.tid) { + buf = le32_to_cpu(*dlct++); + entry->entry_size = buf & 0xffff; + entry->tid = buf >> 16 & 0xfff; + + entry->change_ind = le32_to_cpu(*dlct++); + entry->device_flags = le32_to_cpu(*dlct++); + + buf = le32_to_cpu(*dlct++); + entry->class_id = buf & 0xfff; + entry->version = buf >> 12 & 0xf; + entry->vendor_id = buf >> 16; + + entry->sub_class = le32_to_cpu(*dlct++); + + buf = le32_to_cpu(*dlct++); + entry->user_tid = buf & 0xfff; + entry->parent_tid = buf >> 12 & 0xfff; + entry->bios_info = buf >> 24; + + memcpy(&entry->identity_tag, dlct, 8); + dlct += 2; + + entry->event_capabilities = le32_to_cpu(*dlct++); + + /* add new devices, which are new in the LCT */ + list_for_each_entry_safe(dev, tmp, &c->devices, list) { + if (entry->tid == dev->lct_data.tid) { found = 1; break; } } if (!found) - i2o_device_remove(dev); + i2o_device_add(c, entry); + + table_size -= 9; + max++; } - /* add new devices, which are new in the LCT */ - for (i = 0; i < max; i++) { + /* remove devices, which are not in the LCT anymore */ + list_for_each_entry_safe(dev, tmp, &c->devices, list) { int found = 0; - list_for_each_entry_safe(dev, tmp, &c->devices, list) { + for (i = 0; i < max; i++) { if (lct->lct_entry[i].tid == dev->lct_data.tid) { found = 1; break; @@ -398,14 +400,14 @@ int i2o_device_parse_lct(struct i2o_controller *c) } if (!found) - i2o_device_add(c, &lct->lct_entry[i]); + i2o_device_remove(dev); } + up(&c->lct_lock); return 0; } - /* * Run time support routines */ @@ -419,13 +421,9 @@ int i2o_device_parse_lct(struct i2o_controller *c) * ResultCount, ErrorInfoSize, BlockStatus and BlockSize. */ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, - int oplen, void *reslist, int reslen) + int oplen, void *reslist, int reslen) { - struct i2o_message __iomem *msg; - u32 m; - u32 *res32 = (u32 *) reslist; - u32 *restmp = (u32 *) reslist; - int len = 0; + struct i2o_message *msg; int i = 0; int rc; struct i2o_dma res; @@ -437,26 +435,27 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL)) return -ENOMEM; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) { + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) { i2o_dma_free(dev, &res); - return -ETIMEDOUT; + return PTR_ERR(msg); } i = 0; - writel(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid, - &msg->u.head[1]); - writel(0, &msg->body[i++]); - writel(0x4C000000 | oplen, &msg->body[i++]); /* OperationList */ - memcpy_toio(&msg->body[i], oplist, oplen); + msg->u.head[1] = + cpu_to_le32(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid); + msg->body[i++] = cpu_to_le32(0x00000000); + msg->body[i++] = cpu_to_le32(0x4C000000 | oplen); /* OperationList */ + memcpy(&msg->body[i], oplist, oplen); i += (oplen / 4 + (oplen % 4 ? 1 : 0)); - writel(0xD0000000 | res.len, &msg->body[i++]); /* ResultList */ - writel(res.phys, &msg->body[i++]); + msg->body[i++] = cpu_to_le32(0xD0000000 | res.len); /* ResultList */ + msg->body[i++] = cpu_to_le32(res.phys); - writel(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) | - SGL_OFFSET_5, &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) | + SGL_OFFSET_5); - rc = i2o_msg_post_wait_mem(c, m, 10, &res); + rc = i2o_msg_post_wait_mem(c, msg, 10, &res); /* This only looks like a memory leak - don't "fix" it. */ if (rc == -ETIMEDOUT) @@ -465,36 +464,7 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, memcpy(reslist, res.virt, res.len); i2o_dma_free(dev, &res); - /* Query failed */ - if (rc) - return rc; - /* - * Calculate number of bytes of Result LIST - * We need to loop through each Result BLOCK and grab the length - */ - restmp = res32 + 1; - len = 1; - for (i = 0; i < (res32[0] & 0X0000FFFF); i++) { - if (restmp[0] & 0x00FF0000) { /* BlockStatus != SUCCESS */ - printk(KERN_WARNING - "%s - Error:\n ErrorInfoSize = 0x%02x, " - "BlockStatus = 0x%02x, BlockSize = 0x%04x\n", - (cmd == - I2O_CMD_UTIL_PARAMS_SET) ? "PARAMS_SET" : - "PARAMS_GET", res32[1] >> 24, - (res32[1] >> 16) & 0xFF, res32[1] & 0xFFFF); - - /* - * If this is the only request,than we return an error - */ - if ((res32[0] & 0x0000FFFF) == 1) { - return -((res32[1] >> 16) & 0xFF); /* -BlockStatus */ - } - } - len += restmp[0] & 0x0000FFFF; /* Length of res BLOCK */ - restmp += restmp[0] & 0x0000FFFF; /* Skip to next BLOCK */ - } - return (len << 2); /* bytes used by result list */ + return rc; } /* @@ -503,28 +473,25 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, int i2o_parm_field_get(struct i2o_device *i2o_dev, int group, int field, void *buf, int buflen) { - u16 opblk[] = { 1, 0, I2O_PARAMS_FIELD_GET, group, 1, field }; + u32 opblk[] = { cpu_to_le32(0x00000001), + cpu_to_le32((u16) group << 16 | I2O_PARAMS_FIELD_GET), + cpu_to_le32((s16) field << 16 | 0x00000001) + }; u8 *resblk; /* 8 bytes for header */ - int size; - - if (field == -1) /* whole group */ - opblk[4] = -1; + int rc; resblk = kmalloc(buflen + 8, GFP_KERNEL | GFP_ATOMIC); if (!resblk) return -ENOMEM; - size = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_GET, opblk, - sizeof(opblk), resblk, buflen + 8); + rc = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_GET, opblk, + sizeof(opblk), resblk, buflen + 8); memcpy(buf, resblk + 8, buflen); /* cut off header */ kfree(resblk); - if (size > buflen) - return buflen; - - return size; + return rc; } /* @@ -534,12 +501,12 @@ int i2o_parm_field_get(struct i2o_device *i2o_dev, int group, int field, * else return specific fields * ibuf contains fieldindexes * - * if oper == I2O_PARAMS_LIST_GET, get from specific rows - * if fieldcount == -1 return all fields + * if oper == I2O_PARAMS_LIST_GET, get from specific rows + * if fieldcount == -1 return all fields * ibuf contains rowcount, keyvalues - * else return specific fields + * else return specific fields * fieldcount is # of fieldindexes - * ibuf contains fieldindexes, rowcount, keyvalues + * ibuf contains fieldindexes, rowcount, keyvalues * * You could also use directly function i2o_issue_params(). */ diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 0fb9c4e2ad4..64130227574 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -61,12 +61,10 @@ static int i2o_bus_match(struct device *dev, struct device_driver *drv) }; /* I2O bus type */ -extern struct device_attribute i2o_device_attrs[]; - struct bus_type i2o_bus_type = { .name = "i2o", .match = i2o_bus_match, - .dev_attrs = i2o_device_attrs, + .dev_attrs = i2o_device_attrs }; /** @@ -219,14 +217,14 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m) /* cut of header from message size (in 32-bit words) */ size = (le32_to_cpu(msg->u.head[0]) >> 16) - 5; - evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC | __GFP_ZERO); + evt = kzalloc(size * 4 + sizeof(*evt), GFP_ATOMIC); if (!evt) return -ENOMEM; evt->size = size; evt->tcntxt = le32_to_cpu(msg->u.s.tcntxt); evt->event_indicator = le32_to_cpu(msg->body[0]); - memcpy(&evt->tcntxt, &msg->u.s.tcntxt, size * 4); + memcpy(&evt->data, &msg->body[1], size * 4); list_for_each_entry_safe(dev, tmp, &c->devices, list) if (dev->lct_data.tid == tid) { @@ -349,12 +347,10 @@ int __init i2o_driver_init(void) osm_info("max drivers = %d\n", i2o_max_drivers); i2o_drivers = - kmalloc(i2o_max_drivers * sizeof(*i2o_drivers), GFP_KERNEL); + kzalloc(i2o_max_drivers * sizeof(*i2o_drivers), GFP_KERNEL); if (!i2o_drivers) return -ENOMEM; - memset(i2o_drivers, 0, i2o_max_drivers * sizeof(*i2o_drivers)); - rc = bus_register(&i2o_bus_type); if (rc < 0) diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index 9c339a2505b..9bb9859f6df 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -33,7 +33,7 @@ #include <linux/workqueue.h> #include <linux/string.h> #include <linux/slab.h> -#include <linux/sched.h> /* wait_event_interruptible_timeout() needs this */ +#include <linux/sched.h> /* wait_event_interruptible_timeout() needs this */ #include <asm/param.h> /* HZ */ #include "core.h" @@ -75,11 +75,9 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void) { struct i2o_exec_wait *wait; - wait = kmalloc(sizeof(*wait), GFP_KERNEL); + wait = kzalloc(sizeof(*wait), GFP_KERNEL); if (!wait) - return ERR_PTR(-ENOMEM); - - memset(wait, 0, sizeof(*wait)); + return NULL; INIT_LIST_HEAD(&wait->list); @@ -114,13 +112,12 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) * Returns 0 on success, negative error code on timeout or positive error * code from reply. */ -int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long - timeout, struct i2o_dma *dma) +int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg, + unsigned long timeout, struct i2o_dma *dma) { DECLARE_WAIT_QUEUE_HEAD(wq); struct i2o_exec_wait *wait; static u32 tcntxt = 0x80000000; - struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m); int rc = 0; wait = i2o_exec_wait_alloc(); @@ -138,15 +135,15 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long * We will only use transaction contexts >= 0x80000000 for POST WAIT, * so we could find a POST WAIT reply easier in the reply handler. */ - writel(i2o_exec_driver.context, &msg->u.s.icntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); wait->tcntxt = tcntxt++; - writel(wait->tcntxt, &msg->u.s.tcntxt); + msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt); /* * Post the message to the controller. At some point later it will * return. If we time out before it returns then complete will be zero. */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); if (!wait->complete) { wait->wq = &wq; @@ -266,13 +263,14 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m, * * Returns number of bytes printed into buffer. */ -static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t i2o_exec_show_vendor_id(struct device *d, + struct device_attribute *attr, char *buf) { struct i2o_device *dev = to_i2o_device(d); u16 id; - if (i2o_parm_field_get(dev, 0x0000, 0, &id, 2)) { - sprintf(buf, "0x%04x", id); + if (!i2o_parm_field_get(dev, 0x0000, 0, &id, 2)) { + sprintf(buf, "0x%04x", le16_to_cpu(id)); return strlen(buf) + 1; } @@ -286,13 +284,15 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute * * Returns number of bytes printed into buffer. */ -static ssize_t i2o_exec_show_product_id(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t i2o_exec_show_product_id(struct device *d, + struct device_attribute *attr, + char *buf) { struct i2o_device *dev = to_i2o_device(d); u16 id; - if (i2o_parm_field_get(dev, 0x0000, 1, &id, 2)) { - sprintf(buf, "0x%04x", id); + if (!i2o_parm_field_get(dev, 0x0000, 1, &id, 2)) { + sprintf(buf, "0x%04x", le16_to_cpu(id)); return strlen(buf) + 1; } @@ -362,7 +362,9 @@ static void i2o_exec_lct_modified(struct i2o_controller *c) if (i2o_device_parse_lct(c) != -EAGAIN) change_ind = c->lct->change_ind + 1; +#ifdef CONFIG_I2O_LCT_NOTIFY_ON_CHANGES i2o_exec_lct_notify(c, change_ind); +#endif }; /** @@ -385,23 +387,22 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m, u32 context; if (le32_to_cpu(msg->u.head[0]) & MSG_FAIL) { + struct i2o_message __iomem *pmsg; + u32 pm; + /* * If Fail bit is set we must take the transaction context of * the preserved message to find the right request again. */ - struct i2o_message __iomem *pmsg; - u32 pm; pm = le32_to_cpu(msg->body[3]); - pmsg = i2o_msg_in_to_virt(c, pm); + context = readl(&pmsg->u.s.tcntxt); i2o_report_status(KERN_INFO, "i2o_core", msg); - context = readl(&pmsg->u.s.tcntxt); - /* Release the preserved msg */ - i2o_msg_nop(c, pm); + i2o_msg_nop_mfa(c, pm); } else context = le32_to_cpu(msg->u.s.tcntxt); @@ -462,25 +463,26 @@ static void i2o_exec_event(struct i2o_event *evt) */ int i2o_exec_lct_get(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int i = 0; int rc = -EAGAIN; for (i = 1; i <= I2O_LCT_GET_TRIES; i++) { - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0xffffffff, &msg->body[0]); - writel(0x00000000, &msg->body[1]); - writel(0xd0000000 | c->dlct.len, &msg->body[2]); - writel(c->dlct.phys, &msg->body[3]); - - rc = i2o_msg_post_wait(c, m, I2O_TIMEOUT_LCT_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = + cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->body[0] = cpu_to_le32(0xffffffff); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len); + msg->body[3] = cpu_to_le32(c->dlct.phys); + + rc = i2o_msg_post_wait(c, msg, I2O_TIMEOUT_LCT_GET); if (rc < 0) break; @@ -506,29 +508,29 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind) { i2o_status_block *sb = c->status_block.virt; struct device *dev; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; dev = &c->pdev->dev; - if (i2o_dma_realloc(dev, &c->dlct, sb->expected_lct_size, GFP_KERNEL)) + if (i2o_dma_realloc + (dev, &c->dlct, le32_to_cpu(sb->expected_lct_size), GFP_KERNEL)) return -ENOMEM; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); /* FIXME */ - writel(0xffffffff, &msg->body[0]); - writel(change_ind, &msg->body[1]); - writel(0xd0000000 | c->dlct.len, &msg->body[2]); - writel(c->dlct.phys, &msg->body[3]); - - i2o_msg_post(c, m); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0xffffffff); + msg->body[1] = cpu_to_le32(change_ind); + msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len); + msg->body[3] = cpu_to_le32(c->dlct.phys); + + i2o_msg_post(c, msg); return 0; }; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 4f522527b7e..5b1febed313 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -59,10 +59,12 @@ #include <linux/blkdev.h> #include <linux/hdreg.h> +#include <scsi/scsi.h> + #include "i2o_block.h" #define OSM_NAME "block-osm" -#define OSM_VERSION "1.287" +#define OSM_VERSION "1.325" #define OSM_DESCRIPTION "I2O Block Device OSM" static struct i2o_driver i2o_block_driver; @@ -130,20 +132,20 @@ static int i2o_block_remove(struct device *dev) */ static int i2o_block_device_flush(struct i2o_device *dev) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(60 << 16, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(60 << 16); osm_debug("Flushing...\n"); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); }; /** @@ -181,21 +183,21 @@ static int i2o_block_issue_flush(request_queue_t * queue, struct gendisk *disk, */ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; - - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(-1, &msg->body[0]); - writel(0, &msg->body[1]); + struct i2o_message *msg; + + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(-1); + msg->body[1] = cpu_to_le32(0x00000000); osm_debug("Mounting...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -210,20 +212,20 @@ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id) */ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg) == I2O_QUEUE_EMPTY) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(-1, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(-1); osm_debug("Locking...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -238,20 +240,20 @@ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id) */ static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(media_id, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(media_id); osm_debug("Unlocking...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -267,21 +269,21 @@ static int i2o_block_device_power(struct i2o_block_device *dev, u8 op) { struct i2o_device *i2o_dev = dev->i2o_dev; struct i2o_controller *c = i2o_dev->iop; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int rc; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->lct_data. - tid, &msg->u.head[1]); - writel(op << 24, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(op << 24); osm_debug("Power...\n"); - rc = i2o_msg_post_wait(c, m, 60); + rc = i2o_msg_post_wait(c, msg, 60); if (!rc) dev->power = op; @@ -331,7 +333,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq) */ static inline int i2o_block_sglist_alloc(struct i2o_controller *c, struct i2o_block_request *ireq, - u32 __iomem ** mptr) + u32 ** mptr) { int nents; enum dma_data_direction direction; @@ -745,10 +747,9 @@ static int i2o_block_transfer(struct request *req) struct i2o_block_device *dev = req->rq_disk->private_data; struct i2o_controller *c; int tid = dev->i2o_dev->lct_data.tid; - struct i2o_message __iomem *msg; - u32 __iomem *mptr; + struct i2o_message *msg; + u32 *mptr; struct i2o_block_request *ireq = req->special; - u32 m; u32 tcntxt; u32 sgl_offset = SGL_OFFSET_8; u32 ctl_flags = 0x00000000; @@ -763,9 +764,9 @@ static int i2o_block_transfer(struct request *req) c = dev->i2o_dev->iop; - m = i2o_msg_get(c, &msg); - if (m == I2O_QUEUE_EMPTY) { - rc = -EBUSY; + msg = i2o_msg_get(c); + if (IS_ERR(msg)) { + rc = PTR_ERR(msg); goto exit; } @@ -775,8 +776,8 @@ static int i2o_block_transfer(struct request *req) goto nop_msg; } - writel(i2o_block_driver.context, &msg->u.s.icntxt); - writel(tcntxt, &msg->u.s.tcntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_block_driver.context); + msg->u.s.tcntxt = cpu_to_le32(tcntxt); mptr = &msg->body[0]; @@ -834,11 +835,11 @@ static int i2o_block_transfer(struct request *req) sgl_offset = SGL_OFFSET_12; - writel(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid, - &msg->u.head[1]); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid); - writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++); - writel(tid, mptr++); + *mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC); + *mptr++ = cpu_to_le32(tid); /* * ENABLE_DISCONNECT @@ -846,29 +847,31 @@ static int i2o_block_transfer(struct request *req) * RETURN_SENSE_DATA_IN_REPLY_MESSAGE_FRAME */ if (rq_data_dir(req) == READ) { - cmd[0] = 0x28; + cmd[0] = READ_10; scsi_flags = 0x60a0000a; } else { - cmd[0] = 0x2A; + cmd[0] = WRITE_10; scsi_flags = 0xa0a0000a; } - writel(scsi_flags, mptr++); + *mptr++ = cpu_to_le32(scsi_flags); *((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec); *((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec); - memcpy_toio(mptr, cmd, 10); + memcpy(mptr, cmd, 10); mptr += 4; - writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++); + *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); } else #endif { - writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]); - writel(ctl_flags, mptr++); - writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++); - writel((u32) (req->sector << KERNEL_SECTOR_SHIFT), mptr++); - writel(req->sector >> (32 - KERNEL_SECTOR_SHIFT), mptr++); + msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid); + *mptr++ = cpu_to_le32(ctl_flags); + *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); + *mptr++ = + cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT)); + *mptr++ = + cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT)); } if (!i2o_block_sglist_alloc(c, ireq, &mptr)) { @@ -876,13 +879,13 @@ static int i2o_block_transfer(struct request *req) goto context_remove; } - writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | - sgl_offset, &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset); list_add_tail(&ireq->queue, &dev->open_queue); dev->open_queue_depth++; - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; @@ -890,7 +893,7 @@ static int i2o_block_transfer(struct request *req) i2o_cntxt_list_remove(c, req); nop_msg: - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); exit: return rc; @@ -978,13 +981,12 @@ static struct i2o_block_device *i2o_block_device_alloc(void) struct request_queue *queue; int rc; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { osm_err("Insufficient memory to allocate I2O Block disk.\n"); rc = -ENOMEM; goto exit; } - memset(dev, 0, sizeof(*dev)); INIT_LIST_HEAD(&dev->open_queue); spin_lock_init(&dev->lock); @@ -1049,8 +1051,8 @@ static int i2o_block_probe(struct device *dev) int rc; u64 size; u32 blocksize; - u32 flags, status; u16 body_size = 4; + u16 power; unsigned short max_sectors; #ifdef CONFIG_I2O_EXT_ADAPTEC @@ -1108,22 +1110,20 @@ static int i2o_block_probe(struct device *dev) * Ask for the current media data. If that isn't supported * then we ask for the device capacity data */ - if (i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) || - i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) { - blk_queue_hardsect_size(queue, blocksize); + if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) || + !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) { + blk_queue_hardsect_size(queue, le32_to_cpu(blocksize)); } else osm_warn("unable to get blocksize of %s\n", gd->disk_name); - if (i2o_parm_field_get(i2o_dev, 0x0004, 0, &size, 8) || - i2o_parm_field_get(i2o_dev, 0x0000, 4, &size, 8)) { - set_capacity(gd, size >> KERNEL_SECTOR_SHIFT); + if (!i2o_parm_field_get(i2o_dev, 0x0004, 0, &size, 8) || + !i2o_parm_field_get(i2o_dev, 0x0000, 4, &size, 8)) { + set_capacity(gd, le64_to_cpu(size) >> KERNEL_SECTOR_SHIFT); } else osm_warn("could not get size of %s\n", gd->disk_name); - if (!i2o_parm_field_get(i2o_dev, 0x0000, 2, &i2o_blk_dev->power, 2)) - i2o_blk_dev->power = 0; - i2o_parm_field_get(i2o_dev, 0x0000, 5, &flags, 4); - i2o_parm_field_get(i2o_dev, 0x0000, 6, &status, 4); + if (!i2o_parm_field_get(i2o_dev, 0x0000, 2, &power, 2)) + i2o_blk_dev->power = power; i2o_event_register(i2o_dev, &i2o_block_driver, 0, 0xffffffff); diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 3c3a7abebb1..89daf67b764 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -36,12 +36,12 @@ #include <asm/uaccess.h> -#include "core.h" - #define SG_TABLESIZE 30 -static int i2o_cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, - unsigned long arg); +extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int); + +static int i2o_cfg_ioctl(struct inode *, struct file *, unsigned int, + unsigned long); static spinlock_t i2o_config_lock; @@ -230,8 +230,7 @@ static int i2o_cfg_swdl(unsigned long arg) struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; unsigned char maxfrag = 0, curfrag = 1; struct i2o_dma buffer; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int status = 0, swlen = 0, fragsize = 8192; struct i2o_controller *c; @@ -257,31 +256,34 @@ static int i2o_cfg_swdl(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } __copy_from_user(buffer.virt, kxfer.buf, fragsize); - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]); - writel(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((((u32) kxfer.flags) << 24) | (((u32) kxfer.sw_type) << 16) | - (((u32) maxfrag) << 8) | (((u32) curfrag)), &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); - writel(0xD0000000 | fragsize, &msg->body[3]); - writel(buffer.phys, &msg->body[4]); + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((((u32) kxfer.flags) << 24) | (((u32) kxfer. + sw_type) << 16) | + (((u32) maxfrag) << 8) | (((u32) curfrag))); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); + msg->body[3] = cpu_to_le32(0xD0000000 | fragsize); + msg->body[4] = cpu_to_le32(buffer.phys); osm_debug("swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize); - status = i2o_msg_post_wait_mem(c, m, 60, &buffer); + status = i2o_msg_post_wait_mem(c, msg, 60, &buffer); if (status != -ETIMEDOUT) i2o_dma_free(&c->pdev->dev, &buffer); @@ -302,8 +304,7 @@ static int i2o_cfg_swul(unsigned long arg) struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; unsigned char maxfrag = 0, curfrag = 1; struct i2o_dma buffer; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int status = 0, swlen = 0, fragsize = 8192; struct i2o_controller *c; int ret = 0; @@ -330,30 +331,30 @@ static int i2o_cfg_swul(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]); - writel(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((u32) kxfer.flags << 24 | (u32) kxfer. - sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag, - &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); - writel(0xD0000000 | fragsize, &msg->body[3]); - writel(buffer.phys, &msg->body[4]); + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer. + sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); + msg->body[3] = cpu_to_le32(0xD0000000 | fragsize); + msg->body[4] = cpu_to_le32(buffer.phys); osm_debug("swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize); - status = i2o_msg_post_wait_mem(c, m, 60, &buffer); + status = i2o_msg_post_wait_mem(c, msg, 60, &buffer); if (status != I2O_POST_WAIT_OK) { if (status != -ETIMEDOUT) @@ -380,8 +381,7 @@ static int i2o_cfg_swdel(unsigned long arg) struct i2o_controller *c; struct i2o_sw_xfer kxfer; struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int swlen; int token; @@ -395,21 +395,21 @@ static int i2o_cfg_swdel(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16, - &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); + msg->u.head[0] = cpu_to_le32(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); - token = i2o_msg_post_wait(c, m, 10); + token = i2o_msg_post_wait(c, msg, 10); if (token != I2O_POST_WAIT_OK) { osm_info("swdel failed, DetailedStatus = %d\n", token); @@ -423,25 +423,24 @@ static int i2o_cfg_validate(unsigned long arg) { int token; int iop = (int)arg; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; struct i2o_controller *c; c = i2o_find_iop(iop); if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); - token = i2o_msg_post_wait(c, m, 10); + token = i2o_msg_post_wait(c, msg, 10); if (token != I2O_POST_WAIT_OK) { osm_info("Can't validate configuration, ErrorStatus = %d\n", @@ -454,8 +453,7 @@ static int i2o_cfg_validate(unsigned long arg) static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; struct i2o_evt_id __user *pdesc = (struct i2o_evt_id __user *)arg; struct i2o_evt_id kdesc; struct i2o_controller *c; @@ -474,18 +472,19 @@ static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp) if (!d) return -ENODEV; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | kdesc.tid, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(i2o_cntxt_list_add(c, fp->private_data), &msg->u.head[3]); - writel(kdesc.evt_mask, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | + kdesc.tid); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(i2o_cntxt_list_add(c, fp->private_data)); + msg->body[0] = cpu_to_le32(kdesc.evt_mask); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; } @@ -537,7 +536,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, u32 sg_index = 0; i2o_status_block *sb; struct i2o_message *msg; - u32 m; unsigned int iop; cmd = (struct i2o_cmd_passthru32 __user *)arg; @@ -553,7 +551,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, return -ENXIO; } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); sb = c->status_block.virt; @@ -585,19 +583,15 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, reply_size >>= 16; reply_size <<= 2; - reply = kmalloc(reply_size, GFP_KERNEL); + reply = kzalloc(reply_size, GFP_KERNEL); if (!reply) { printk(KERN_WARNING "%s: Could not allocate reply buffer\n", c->name); return -ENOMEM; } - memset(reply, 0, reply_size); sg_offset = (msg->u.head[0] >> 4) & 0x0f; - writel(i2o_config_driver.context, &msg->u.s.icntxt); - writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt); - memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { struct sg_simple_element *sg; @@ -631,7 +625,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, goto cleanup; } sg_size = sg[i].flag_count & 0xffffff; - p = &(sg_list[sg_index++]); + p = &(sg_list[sg_index]); /* Allocate memory for the transfer */ if (i2o_dma_alloc (&c->pdev->dev, p, sg_size, @@ -642,6 +636,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, rcode = -ENOMEM; goto sg_list_cleanup; } + sg_index++; /* Copy in the user's SG buffer if necessary */ if (sg[i]. flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) { @@ -662,9 +657,11 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } - rcode = i2o_msg_post_wait(c, m, 60); - if (rcode) + rcode = i2o_msg_post_wait(c, msg, 60); + if (rcode) { + reply[4] = ((u32) rcode) << 24; goto sg_list_cleanup; + } if (sg_offset) { u32 msg[I2O_OUTBOUND_MSG_FRAME_SIZE]; @@ -714,6 +711,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } + sg_list_cleanup: /* Copy back the reply to user space */ if (reply_size) { // we wrote our own values for context - now restore the user supplied ones @@ -731,7 +729,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } - sg_list_cleanup: for (i = 0; i < sg_index; i++) i2o_dma_free(&c->pdev->dev, &sg_list[i]); @@ -780,8 +777,7 @@ static int i2o_cfg_passthru(unsigned long arg) u32 i = 0; void *p = NULL; i2o_status_block *sb; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int iop; if (get_user(iop, &cmd->iop) || get_user(user_msg, &cmd->msg)) @@ -793,7 +789,7 @@ static int i2o_cfg_passthru(unsigned long arg) return -ENXIO; } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); sb = c->status_block.virt; @@ -820,19 +816,15 @@ static int i2o_cfg_passthru(unsigned long arg) reply_size >>= 16; reply_size <<= 2; - reply = kmalloc(reply_size, GFP_KERNEL); + reply = kzalloc(reply_size, GFP_KERNEL); if (!reply) { printk(KERN_WARNING "%s: Could not allocate reply buffer\n", c->name); return -ENOMEM; } - memset(reply, 0, reply_size); sg_offset = (msg->u.head[0] >> 4) & 0x0f; - writel(i2o_config_driver.context, &msg->u.s.icntxt); - writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt); - memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { struct sg_simple_element *sg; @@ -894,9 +886,11 @@ static int i2o_cfg_passthru(unsigned long arg) } } - rcode = i2o_msg_post_wait(c, m, 60); - if (rcode) + rcode = i2o_msg_post_wait(c, msg, 60); + if (rcode) { + reply[4] = ((u32) rcode) << 24; goto sg_list_cleanup; + } if (sg_offset) { u32 msg[128]; @@ -946,6 +940,7 @@ static int i2o_cfg_passthru(unsigned long arg) } } + sg_list_cleanup: /* Copy back the reply to user space */ if (reply_size) { // we wrote our own values for context - now restore the user supplied ones @@ -962,7 +957,6 @@ static int i2o_cfg_passthru(unsigned long arg) } } - sg_list_cleanup: for (i = 0; i < sg_index; i++) kfree(sg_list[i]); diff --git a/drivers/message/i2o/i2o_lan.h b/drivers/message/i2o/i2o_lan.h index 561d63304d7..6502b817df5 100644 --- a/drivers/message/i2o/i2o_lan.h +++ b/drivers/message/i2o/i2o_lan.h @@ -103,14 +103,14 @@ #define I2O_LAN_DSC_SUSPENDED 0x11 struct i2o_packet_info { - u32 offset : 24; - u32 flags : 8; - u32 len : 24; - u32 status : 8; + u32 offset:24; + u32 flags:8; + u32 len:24; + u32 status:8; }; struct i2o_bucket_descriptor { - u32 context; /* FIXME: 64bit support */ + u32 context; /* FIXME: 64bit support */ struct i2o_packet_info packet_info[1]; }; @@ -127,14 +127,14 @@ struct i2o_lan_local { u8 unit; struct i2o_device *i2o_dev; - struct fddi_statistics stats; /* see also struct net_device_stats */ - unsigned short (*type_trans)(struct sk_buff *, struct net_device *); - atomic_t buckets_out; /* nbr of unused buckets on DDM */ - atomic_t tx_out; /* outstanding TXes */ - u8 tx_count; /* packets in one TX message frame */ - u16 tx_max_out; /* DDM's Tx queue len */ - u8 sgl_max; /* max SGLs in one message frame */ - u32 m; /* IOP address of the batch msg frame */ + struct fddi_statistics stats; /* see also struct net_device_stats */ + unsigned short (*type_trans) (struct sk_buff *, struct net_device *); + atomic_t buckets_out; /* nbr of unused buckets on DDM */ + atomic_t tx_out; /* outstanding TXes */ + u8 tx_count; /* packets in one TX message frame */ + u16 tx_max_out; /* DDM's Tx queue len */ + u8 sgl_max; /* max SGLs in one message frame */ + u32 m; /* IOP address of the batch msg frame */ struct work_struct i2o_batch_send_task; int send_active; @@ -144,16 +144,16 @@ struct i2o_lan_local { spinlock_t tx_lock; - u32 max_size_mc_table; /* max number of multicast addresses */ + u32 max_size_mc_table; /* max number of multicast addresses */ /* LAN OSM configurable parameters are here: */ - u16 max_buckets_out; /* max nbr of buckets to send to DDM */ - u16 bucket_thresh; /* send more when this many used */ + u16 max_buckets_out; /* max nbr of buckets to send to DDM */ + u16 bucket_thresh; /* send more when this many used */ u16 rx_copybreak; - u8 tx_batch_mode; /* Set when using batch mode sends */ - u32 i2o_event_mask; /* To turn on interesting event flags */ + u8 tx_batch_mode; /* Set when using batch mode sends */ + u32 i2o_event_mask; /* To turn on interesting event flags */ }; -#endif /* _I2O_LAN_H */ +#endif /* _I2O_LAN_H */ diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c index d559a175836..2a0c42b8cda 100644 --- a/drivers/message/i2o/i2o_proc.c +++ b/drivers/message/i2o/i2o_proc.c @@ -28,7 +28,7 @@ */ #define OSM_NAME "proc-osm" -#define OSM_VERSION "1.145" +#define OSM_VERSION "1.316" #define OSM_DESCRIPTION "I2O ProcFS OSM" #define I2O_MAX_MODULES 4 diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 9f1744c3933..f9e5a23697a 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -70,7 +70,7 @@ #include <scsi/sg_request.h> #define OSM_NAME "scsi-osm" -#define OSM_VERSION "1.282" +#define OSM_VERSION "1.316" #define OSM_DESCRIPTION "I2O SCSI Peripheral OSM" static struct i2o_driver i2o_scsi_driver; @@ -113,7 +113,7 @@ static struct i2o_scsi_host *i2o_scsi_host_alloc(struct i2o_controller *c) list_for_each_entry(i2o_dev, &c->devices, list) if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER) { - if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) + if (!i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) && (type == 0x01)) /* SCSI bus */ max_channel++; } @@ -146,7 +146,7 @@ static struct i2o_scsi_host *i2o_scsi_host_alloc(struct i2o_controller *c) i = 0; list_for_each_entry(i2o_dev, &c->devices, list) if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER) { - if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) + if (!i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1) && (type == 0x01)) /* only SCSI bus */ i2o_shost->channel[i++] = i2o_dev; @@ -238,13 +238,15 @@ static int i2o_scsi_probe(struct device *dev) u8 type; struct i2o_device *d = i2o_shost->channel[0]; - if (i2o_parm_field_get(d, 0x0000, 0, &type, 1) + if (!i2o_parm_field_get(d, 0x0000, 0, &type, 1) && (type == 0x01)) /* SCSI bus */ - if (i2o_parm_field_get(d, 0x0200, 4, &id, 4)) { + if (!i2o_parm_field_get(d, 0x0200, 4, &id, 4)) { channel = 0; if (i2o_dev->lct_data.class_id == I2O_CLASS_RANDOM_BLOCK_STORAGE) - lun = i2o_shost->lun++; + lun = + cpu_to_le64(i2o_shost-> + lun++); else lun = 0; } @@ -253,10 +255,10 @@ static int i2o_scsi_probe(struct device *dev) break; case I2O_CLASS_SCSI_PERIPHERAL: - if (i2o_parm_field_get(i2o_dev, 0x0000, 3, &id, 4) < 0) + if (i2o_parm_field_get(i2o_dev, 0x0000, 3, &id, 4)) return -EFAULT; - if (i2o_parm_field_get(i2o_dev, 0x0000, 4, &lun, 8) < 0) + if (i2o_parm_field_get(i2o_dev, 0x0000, 4, &lun, 8)) return -EFAULT; parent = i2o_iop_find_device(c, i2o_dev->lct_data.parent_tid); @@ -281,20 +283,22 @@ static int i2o_scsi_probe(struct device *dev) return -EFAULT; } - if (id >= scsi_host->max_id) { - osm_warn("SCSI device id (%d) >= max_id of I2O host (%d)", id, - scsi_host->max_id); + if (le32_to_cpu(id) >= scsi_host->max_id) { + osm_warn("SCSI device id (%d) >= max_id of I2O host (%d)", + le32_to_cpu(id), scsi_host->max_id); return -EFAULT; } - if (lun >= scsi_host->max_lun) { - osm_warn("SCSI device id (%d) >= max_lun of I2O host (%d)", - (unsigned int)lun, scsi_host->max_lun); + if (le64_to_cpu(lun) >= scsi_host->max_lun) { + osm_warn("SCSI device lun (%lu) >= max_lun of I2O host (%d)", + (long unsigned int)le64_to_cpu(lun), + scsi_host->max_lun); return -EFAULT; } scsi_dev = - __scsi_add_device(i2o_shost->scsi_host, channel, id, lun, i2o_dev); + __scsi_add_device(i2o_shost->scsi_host, channel, le32_to_cpu(id), + le64_to_cpu(lun), i2o_dev); if (IS_ERR(scsi_dev)) { osm_warn("can not add SCSI device %03x\n", @@ -305,8 +309,9 @@ static int i2o_scsi_probe(struct device *dev) sysfs_create_link(&i2o_dev->device.kobj, &scsi_dev->sdev_gendev.kobj, "scsi"); - osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %d\n", - i2o_dev->lct_data.tid, channel, id, (unsigned int)lun); + osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %ld\n", + i2o_dev->lct_data.tid, channel, le32_to_cpu(id), + (long unsigned int)le64_to_cpu(lun)); return 0; }; @@ -510,8 +515,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, struct i2o_controller *c; struct i2o_device *i2o_dev; int tid; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; /* * ENABLE_DISCONNECT * SIMPLE_TAG @@ -519,7 +523,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, */ u32 scsi_flags = 0x20a00000; u32 sgl_offset; - u32 __iomem *mptr; + u32 *mptr; u32 cmd = I2O_CMD_SCSI_EXEC << 24; int rc = 0; @@ -576,8 +580,8 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, * throw it back to the scsi layer */ - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) { + msg = i2o_msg_get(c); + if (IS_ERR(msg)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto exit; } @@ -617,16 +621,16 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, if (sgl_offset == SGL_OFFSET_10) sgl_offset = SGL_OFFSET_12; cmd = I2O_CMD_PRIVATE << 24; - writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++); - writel(adpt_flags | tid, mptr++); + *mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC); + *mptr++ = cpu_to_le32(adpt_flags | tid); } #endif - writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]); - writel(i2o_scsi_driver.context, &msg->u.s.icntxt); + msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid); + msg->u.s.icntxt = cpu_to_le32(i2o_scsi_driver.context); /* We want the SCSI control block back */ - writel(i2o_cntxt_list_add(c, SCpnt), &msg->u.s.tcntxt); + msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, SCpnt)); /* LSI_920_PCI_QUIRK * @@ -649,15 +653,15 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, } */ - writel(scsi_flags | SCpnt->cmd_len, mptr++); + *mptr++ = cpu_to_le32(scsi_flags | SCpnt->cmd_len); /* Write SCSI command into the message - always 16 byte block */ - memcpy_toio(mptr, SCpnt->cmnd, 16); + memcpy(mptr, SCpnt->cmnd, 16); mptr += 4; if (sgl_offset != SGL_OFFSET_0) { /* write size of data addressed by SGL */ - writel(SCpnt->request_bufflen, mptr++); + *mptr++ = cpu_to_le32(SCpnt->request_bufflen); /* Now fill in the SGList and command */ if (SCpnt->use_sg) { @@ -676,11 +680,11 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, } /* Stick the headers on */ - writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset, - &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset); /* Queue the message */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); osm_debug("Issued %ld\n", SCpnt->serial_number); @@ -688,7 +692,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, nomem: rc = -ENOMEM; - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); exit: return rc; @@ -709,8 +713,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) { struct i2o_device *i2o_dev; struct i2o_controller *c; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int tid; int status = FAILED; @@ -720,16 +723,16 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) c = i2o_dev->iop; tid = i2o_dev->lct_data.tid; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) return SCSI_MLQUEUE_HOST_BUSY; - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid, - &msg->u.head[1]); - writel(i2o_cntxt_list_get_ptr(c, SCpnt), &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid); + msg->body[0] = cpu_to_le32(i2o_cntxt_list_get_ptr(c, SCpnt)); - if (i2o_msg_post_wait(c, m, I2O_TIMEOUT_SCSI_SCB_ABORT)) + if (i2o_msg_post_wait(c, msg, I2O_TIMEOUT_SCSI_SCB_ABORT)) status = SUCCESS; return status; diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c index 4eb53258842..49216744693 100644 --- a/drivers/message/i2o/iop.c +++ b/drivers/message/i2o/iop.c @@ -32,7 +32,7 @@ #include "core.h" #define OSM_NAME "i2o" -#define OSM_VERSION "1.288" +#define OSM_VERSION "1.325" #define OSM_DESCRIPTION "I2O subsystem" /* global I2O controller list */ @@ -47,27 +47,6 @@ static struct i2o_dma i2o_systab; static int i2o_hrt_get(struct i2o_controller *c); /** - * i2o_msg_nop - Returns a message which is not used - * @c: I2O controller from which the message was created - * @m: message which should be returned - * - * If you fetch a message via i2o_msg_get, and can't use it, you must - * return the message with this function. Otherwise the message frame - * is lost. - */ -void i2o_msg_nop(struct i2o_controller *c, u32 m) -{ - struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m); - - writel(THREE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - i2o_msg_post(c, m); -}; - -/** * i2o_msg_get_wait - obtain an I2O message from the IOP * @c: I2O controller * @msg: pointer to a I2O message pointer @@ -81,22 +60,21 @@ void i2o_msg_nop(struct i2o_controller *c, u32 m) * address from the read port (see the i2o spec). If no message is * available returns I2O_QUEUE_EMPTY and msg is leaved untouched. */ -u32 i2o_msg_get_wait(struct i2o_controller *c, - struct i2o_message __iomem ** msg, int wait) +struct i2o_message *i2o_msg_get_wait(struct i2o_controller *c, int wait) { unsigned long timeout = jiffies + wait * HZ; - u32 m; + struct i2o_message *msg; - while ((m = i2o_msg_get(c, msg)) == I2O_QUEUE_EMPTY) { + while (IS_ERR(msg = i2o_msg_get(c))) { if (time_after(jiffies, timeout)) { osm_debug("%s: Timeout waiting for message frame.\n", c->name); - return I2O_QUEUE_EMPTY; + return ERR_PTR(-ETIMEDOUT); } schedule_timeout_uninterruptible(1); } - return m; + return msg; }; #if BITS_PER_LONG == 64 @@ -301,8 +279,7 @@ struct i2o_device *i2o_iop_find_device(struct i2o_controller *c, u16 tid) */ static int i2o_iop_quiesce(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; int rc; @@ -313,16 +290,17 @@ static int i2o_iop_quiesce(struct i2o_controller *c) (sb->iop_state != ADAPTER_STATE_OPERATIONAL)) return 0; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | + ADAPTER_TID); /* Long timeout needed for quiesce if lots of devices */ - if ((rc = i2o_msg_post_wait(c, m, 240))) + if ((rc = i2o_msg_post_wait(c, msg, 240))) osm_info("%s: Unable to quiesce (status=%#x).\n", c->name, -rc); else osm_debug("%s: Quiesced.\n", c->name); @@ -342,8 +320,7 @@ static int i2o_iop_quiesce(struct i2o_controller *c) */ static int i2o_iop_enable(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; int rc; @@ -353,16 +330,17 @@ static int i2o_iop_enable(struct i2o_controller *c) if (sb->iop_state != ADAPTER_STATE_READY) return -EINVAL; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | + ADAPTER_TID); /* How long of a timeout do we need? */ - if ((rc = i2o_msg_post_wait(c, m, 240))) + if ((rc = i2o_msg_post_wait(c, msg, 240))) osm_err("%s: Could not enable (status=%#x).\n", c->name, -rc); else osm_debug("%s: Enabled.\n", c->name); @@ -413,22 +391,22 @@ static inline void i2o_iop_enable_all(void) */ static int i2o_iop_clear(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int rc; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); /* Quiesce all IOPs first */ i2o_iop_quiesce_all(); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | + ADAPTER_TID); - if ((rc = i2o_msg_post_wait(c, m, 30))) + if ((rc = i2o_msg_post_wait(c, msg, 30))) osm_info("%s: Unable to clear (status=%#x).\n", c->name, -rc); else osm_debug("%s: Cleared.\n", c->name); @@ -446,13 +424,13 @@ static int i2o_iop_clear(struct i2o_controller *c) * Clear and (re)initialize IOP's outbound queue and post the message * frames to the IOP. * - * Returns 0 on success or a negative errno code on failure. + * Returns 0 on success or negative error code on failure. */ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) { - volatile u8 *status = c->status.virt; u32 m; - struct i2o_message __iomem *msg; + volatile u8 *status = c->status.virt; + struct i2o_message *msg; ulong timeout; int i; @@ -460,23 +438,24 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) memset(c->status.virt, 0, 4); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0x00000000, &msg->u.s.tcntxt); - writel(PAGE_SIZE, &msg->body[0]); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(PAGE_SIZE); /* Outbound msg frame size in words and Initcode */ - writel(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80, &msg->body[1]); - writel(0xd0000004, &msg->body[2]); - writel(i2o_dma_low(c->status.phys), &msg->body[3]); - writel(i2o_dma_high(c->status.phys), &msg->body[4]); + msg->body[1] = cpu_to_le32(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80); + msg->body[2] = cpu_to_le32(0xd0000004); + msg->body[3] = cpu_to_le32(i2o_dma_low(c->status.phys)); + msg->body[4] = cpu_to_le32(i2o_dma_high(c->status.phys)); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); timeout = jiffies + I2O_TIMEOUT_INIT_OUTBOUND_QUEUE * HZ; while (*status <= I2O_CMD_IN_PROGRESS) { @@ -511,34 +490,34 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) static int i2o_iop_reset(struct i2o_controller *c) { volatile u8 *status = c->status.virt; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned long timeout; i2o_status_block *sb = c->status_block.virt; int rc = 0; osm_debug("%s: Resetting controller\n", c->name); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); memset(c->status_block.virt, 0, 8); /* Quiesce all IOPs first */ i2o_iop_quiesce_all(); - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); //FIXME: use reasonable transaction context - writel(0, &msg->body[0]); - writel(0, &msg->body[1]); - writel(i2o_dma_low(c->status.phys), &msg->body[2]); - writel(i2o_dma_high(c->status.phys), &msg->body[3]); + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0x00000000); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(i2o_dma_low(c->status.phys)); + msg->body[3] = cpu_to_le32(i2o_dma_high(c->status.phys)); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); /* Wait for a reply */ timeout = jiffies + I2O_TIMEOUT_RESET * HZ; @@ -567,18 +546,15 @@ static int i2o_iop_reset(struct i2o_controller *c) osm_debug("%s: Reset in progress, waiting for reboot...\n", c->name); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET); - while (m == I2O_QUEUE_EMPTY) { + while (IS_ERR(msg = i2o_msg_get_wait(c, I2O_TIMEOUT_RESET))) { if (time_after(jiffies, timeout)) { osm_err("%s: IOP reset timeout.\n", c->name); - rc = -ETIMEDOUT; + rc = PTR_ERR(msg); goto exit; } schedule_timeout_uninterruptible(1); - - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET); } - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); /* from here all quiesce commands are safe */ c->no_quiesce = 0; @@ -686,8 +662,7 @@ static int i2o_iop_activate(struct i2o_controller *c) */ static int i2o_iop_systab_set(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; struct device *dev = &c->pdev->dev; struct resource *root; @@ -735,41 +710,38 @@ static int i2o_iop_systab_set(struct i2o_controller *c) } } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); i2o_systab.phys = dma_map_single(dev, i2o_systab.virt, i2o_systab.len, PCI_DMA_TODEVICE); if (!i2o_systab.phys) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } - writel(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | + ADAPTER_TID); /* * Provide three SGL-elements: * System table (SysTab), Private memory space declaration and * Private i/o space declaration - * - * FIXME: is this still true? - * Nasty one here. We can't use dma_alloc_coherent to send the - * same table to everyone. We have to go remap it for them all */ - writel(c->unit + 2, &msg->body[0]); - writel(0, &msg->body[1]); - writel(0x54000000 | i2o_systab.len, &msg->body[2]); - writel(i2o_systab.phys, &msg->body[3]); - writel(0x54000000 | sb->current_mem_size, &msg->body[4]); - writel(sb->current_mem_base, &msg->body[5]); - writel(0xd4000000 | sb->current_io_size, &msg->body[6]); - writel(sb->current_io_base, &msg->body[6]); + msg->body[0] = cpu_to_le32(c->unit + 2); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(0x54000000 | i2o_systab.len); + msg->body[3] = cpu_to_le32(i2o_systab.phys); + msg->body[4] = cpu_to_le32(0x54000000 | sb->current_mem_size); + msg->body[5] = cpu_to_le32(sb->current_mem_base); + msg->body[6] = cpu_to_le32(0xd4000000 | sb->current_io_size); + msg->body[6] = cpu_to_le32(sb->current_io_base); - rc = i2o_msg_post_wait(c, m, 120); + rc = i2o_msg_post_wait(c, msg, 120); dma_unmap_single(dev, i2o_systab.phys, i2o_systab.len, PCI_DMA_TODEVICE); @@ -780,8 +752,6 @@ static int i2o_iop_systab_set(struct i2o_controller *c) else osm_debug("%s: SysTab set.\n", c->name); - i2o_status_get(c); // Entered READY state - return rc; } @@ -791,7 +761,7 @@ static int i2o_iop_systab_set(struct i2o_controller *c) * * Send the system table and enable the I2O controller. * - * Returns 0 on success or negativer error code on failure. + * Returns 0 on success or negative error code on failure. */ static int i2o_iop_online(struct i2o_controller *c) { @@ -830,7 +800,6 @@ void i2o_iop_remove(struct i2o_controller *c) list_for_each_entry_safe(dev, tmp, &c->devices, list) i2o_device_remove(dev); - class_device_unregister(c->classdev); device_del(&c->device); /* Ask the IOP to switch to RESET state */ @@ -869,12 +838,11 @@ static int i2o_systab_build(void) i2o_systab.len = sizeof(struct i2o_sys_tbl) + num_controllers * sizeof(struct i2o_sys_tbl_entry); - systab = i2o_systab.virt = kmalloc(i2o_systab.len, GFP_KERNEL); + systab = i2o_systab.virt = kzalloc(i2o_systab.len, GFP_KERNEL); if (!systab) { osm_err("unable to allocate memory for System Table\n"); return -ENOMEM; } - memset(systab, 0, i2o_systab.len); systab->version = I2OVERSION; systab->change_ind = change_ind + 1; @@ -952,30 +920,30 @@ static int i2o_parse_hrt(struct i2o_controller *c) */ int i2o_status_get(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; volatile u8 *status_block; unsigned long timeout; status_block = (u8 *) c->status_block.virt; memset(c->status_block.virt, 0, sizeof(i2o_status_block)); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); // FIXME: use resonable transaction context - writel(0, &msg->body[0]); - writel(0, &msg->body[1]); - writel(i2o_dma_low(c->status_block.phys), &msg->body[2]); - writel(i2o_dma_high(c->status_block.phys), &msg->body[3]); - writel(sizeof(i2o_status_block), &msg->body[4]); /* always 88 bytes */ + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0x00000000); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(i2o_dma_low(c->status_block.phys)); + msg->body[3] = cpu_to_le32(i2o_dma_high(c->status_block.phys)); + msg->body[4] = cpu_to_le32(sizeof(i2o_status_block)); /* always 88 bytes */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); /* Wait for a reply */ timeout = jiffies + I2O_TIMEOUT_STATUS_GET * HZ; @@ -1002,7 +970,7 @@ int i2o_status_get(struct i2o_controller *c) * The HRT contains information about possible hidden devices but is * mostly useless to us. * - * Returns 0 on success or negativer error code on failure. + * Returns 0 on success or negative error code on failure. */ static int i2o_hrt_get(struct i2o_controller *c) { @@ -1013,20 +981,20 @@ static int i2o_hrt_get(struct i2o_controller *c) struct device *dev = &c->pdev->dev; for (i = 0; i < I2O_HRT_GET_TRIES; i++) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(SIX_WORD_MSG_SIZE | SGL_OFFSET_4, &msg->u.head[0]); - writel(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0xd0000000 | c->hrt.len, &msg->body[0]); - writel(c->hrt.phys, &msg->body[1]); + msg->u.head[0] = cpu_to_le32(SIX_WORD_MSG_SIZE | SGL_OFFSET_4); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->body[0] = cpu_to_le32(0xd0000000 | c->hrt.len); + msg->body[1] = cpu_to_le32(c->hrt.phys); - rc = i2o_msg_post_wait_mem(c, m, 20, &c->hrt); + rc = i2o_msg_post_wait_mem(c, msg, 20, &c->hrt); if (rc < 0) { osm_err("%s: Unable to get HRT (status=%#x)\n", c->name, @@ -1051,15 +1019,6 @@ static int i2o_hrt_get(struct i2o_controller *c) } /** - * i2o_iop_free - Free the i2o_controller struct - * @c: I2O controller to free - */ -void i2o_iop_free(struct i2o_controller *c) -{ - kfree(c); -}; - -/** * i2o_iop_release - release the memory for a I2O controller * @dev: I2O controller which should be released * @@ -1073,14 +1032,11 @@ static void i2o_iop_release(struct device *dev) i2o_iop_free(c); }; -/* I2O controller class */ -static struct class *i2o_controller_class; - /** * i2o_iop_alloc - Allocate and initialize a i2o_controller struct * * Allocate the necessary memory for a i2o_controller struct and - * initialize the lists. + * initialize the lists and message mempool. * * Returns a pointer to the I2O controller or a negative error code on * failure. @@ -1089,20 +1045,29 @@ struct i2o_controller *i2o_iop_alloc(void) { static int unit = 0; /* 0 and 1 are NULL IOP and Local Host */ struct i2o_controller *c; + char poolname[32]; - c = kmalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { osm_err("i2o: Insufficient memory to allocate a I2O controller." "\n"); return ERR_PTR(-ENOMEM); } - memset(c, 0, sizeof(*c)); + + c->unit = unit++; + sprintf(c->name, "iop%d", c->unit); + + snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name); + if (i2o_pool_alloc + (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4, + I2O_MSG_INPOOL_MIN)) { + kfree(c); + return ERR_PTR(-ENOMEM); + }; INIT_LIST_HEAD(&c->devices); spin_lock_init(&c->lock); init_MUTEX(&c->lct_lock); - c->unit = unit++; - sprintf(c->name, "iop%d", c->unit); device_initialize(&c->device); @@ -1137,36 +1102,29 @@ int i2o_iop_add(struct i2o_controller *c) goto iop_reset; } - c->classdev = class_device_create(i2o_controller_class, NULL, MKDEV(0,0), - &c->device, "iop%d", c->unit); - if (IS_ERR(c->classdev)) { - osm_err("%s: could not add controller class\n", c->name); - goto device_del; - } - osm_info("%s: Activating I2O controller...\n", c->name); osm_info("%s: This may take a few minutes if there are many devices\n", c->name); if ((rc = i2o_iop_activate(c))) { osm_err("%s: could not activate controller\n", c->name); - goto class_del; + goto device_del; } osm_debug("%s: building sys table...\n", c->name); if ((rc = i2o_systab_build())) - goto class_del; + goto device_del; osm_debug("%s: online controller...\n", c->name); if ((rc = i2o_iop_online(c))) - goto class_del; + goto device_del; osm_debug("%s: getting LCT...\n", c->name); if ((rc = i2o_exec_lct_get(c))) - goto class_del; + goto device_del; list_add(&c->list, &i2o_controllers); @@ -1176,9 +1134,6 @@ int i2o_iop_add(struct i2o_controller *c) return 0; - class_del: - class_device_unregister(c->classdev); - device_del: device_del(&c->device); @@ -1199,28 +1154,27 @@ int i2o_iop_add(struct i2o_controller *c) * is waited for, or expected. If you do not want further notifications, * call the i2o_event_register again with a evt_mask of 0. * - * Returns 0 on success or -ETIMEDOUT if no message could be fetched for - * sending the request. + * Returns 0 on success or negative error code on failure. */ int i2o_event_register(struct i2o_device *dev, struct i2o_driver *drv, int tcntxt, u32 evt_mask) { struct i2o_controller *c = dev->iop; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->lct_data. - tid, &msg->u.head[1]); - writel(drv->context, &msg->u.s.icntxt); - writel(tcntxt, &msg->u.s.tcntxt); - writel(evt_mask, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->u.s.icntxt = cpu_to_le32(drv->context); + msg->u.s.tcntxt = cpu_to_le32(tcntxt); + msg->body[0] = cpu_to_le32(evt_mask); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; }; @@ -1239,14 +1193,8 @@ static int __init i2o_iop_init(void) printk(KERN_INFO OSM_DESCRIPTION " v" OSM_VERSION "\n"); - i2o_controller_class = class_create(THIS_MODULE, "i2o_controller"); - if (IS_ERR(i2o_controller_class)) { - osm_err("can't register class i2o_controller\n"); - goto exit; - } - if ((rc = i2o_driver_init())) - goto class_exit; + goto exit; if ((rc = i2o_exec_init())) goto driver_exit; @@ -1262,9 +1210,6 @@ static int __init i2o_iop_init(void) driver_exit: i2o_driver_exit(); - class_exit: - class_destroy(i2o_controller_class); - exit: return rc; } @@ -1279,7 +1224,6 @@ static void __exit i2o_iop_exit(void) i2o_pci_exit(); i2o_exec_exit(); i2o_driver_exit(); - class_destroy(i2o_controller_class); }; module_init(i2o_iop_init); diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c index ee7075fa1ec..c5b656cdea7 100644 --- a/drivers/message/i2o/pci.c +++ b/drivers/message/i2o/pci.c @@ -339,7 +339,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, pci_name(pdev)); c->pdev = pdev; - c->device.parent = get_device(&pdev->dev); + c->device.parent = &pdev->dev; /* Cards that fall apart if you hit them with large I/O loads... */ if (pdev->vendor == PCI_VENDOR_ID_NCR && pdev->device == 0x0630) { @@ -410,8 +410,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, if ((rc = i2o_iop_add(c))) goto uninstall; - get_device(&c->device); - if (i960) pci_write_config_word(i960, 0x42, 0x03ff); @@ -424,7 +422,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, i2o_pci_free(c); free_controller: - put_device(c->device.parent); i2o_iop_free(c); disable: @@ -454,7 +451,6 @@ static void __devexit i2o_pci_remove(struct pci_dev *pdev) printk(KERN_INFO "%s: Controller removed.\n", c->name); - put_device(c->device.parent); put_device(&c->device); }; @@ -483,4 +479,5 @@ void __exit i2o_pci_exit(void) { pci_unregister_driver(&i2o_pci_driver); }; + MODULE_DEVICE_TABLE(pci, i2o_pci_ids); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c782a632980..fa39b944bc4 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -6,7 +6,7 @@ menu "PHY device support" config PHYLIB tristate "PHY Device support and infrastructure" - depends on NET_ETHERNET && (BROKEN || !ARCH_S390) + depends on NET_ETHERNET && (BROKEN || !S390) help Ethernet controllers are usually attached to PHY devices. This option provides infrastructure for diff --git a/drivers/net/plip.c b/drivers/net/plip.c index 1bd22cd40c7..87ee3271b17 100644 --- a/drivers/net/plip.c +++ b/drivers/net/plip.c @@ -98,7 +98,6 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n" #include <linux/in.h> #include <linux/errno.h> #include <linux/delay.h> -#include <linux/lp.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -106,7 +105,6 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n" #include <linux/skbuff.h> #include <linux/if_plip.h> #include <linux/workqueue.h> -#include <linux/ioport.h> #include <linux/spinlock.h> #include <linux/parport.h> #include <linux/bitops.h> diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index 725a14119f2..b8241561da4 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -77,7 +77,7 @@ config PARPORT_PC_SUPERIO config PARPORT_PC_PCMCIA tristate "Support for PCMCIA management for PC-style ports" - depends on PARPORT!=n && (PCMCIA!=n && PARPORT_PC=m && PARPORT_PC || PARPORT_PC=y && PCMCIA) + depends on PCMCIA && PARPORT_PC help Say Y here if you need PCMCIA support for your PC-style parallel ports. If unsure, say N. diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c index 075c7eb5c85..9ee67321b63 100644 --- a/drivers/parport/daisy.c +++ b/drivers/parport/daisy.c @@ -144,9 +144,9 @@ again: add_dev (numdevs++, port, -1); /* Find out the legacy device's IEEE 1284 device ID. */ - deviceid = kmalloc (1000, GFP_KERNEL); + deviceid = kmalloc (1024, GFP_KERNEL); if (deviceid) { - if (parport_device_id (numdevs - 1, deviceid, 1000) > 2) + if (parport_device_id (numdevs - 1, deviceid, 1024) > 2) detected++; kfree (deviceid); @@ -252,7 +252,7 @@ struct pardevice *parport_open (int devnum, const char *name, selected = port->daisy; parport_release (dev); - if (selected != port->daisy) { + if (selected != daisy) { /* No corresponding device. */ parport_unregister_device (dev); return NULL; @@ -344,9 +344,9 @@ static int cpp_daisy (struct parport *port, int cmd) PARPORT_CONTROL_STROBE, PARPORT_CONTROL_STROBE); udelay (1); + s = parport_read_status (port); parport_frob_control (port, PARPORT_CONTROL_STROBE, 0); udelay (1); - s = parport_read_status (port); parport_write_data (port, 0xff); udelay (2); return s; @@ -395,15 +395,15 @@ int parport_daisy_select (struct parport *port, int daisy, int mode) case IEEE1284_MODE_EPP: case IEEE1284_MODE_EPPSL: case IEEE1284_MODE_EPPSWE: - return (cpp_daisy (port, 0x20 + daisy) & - PARPORT_STATUS_ERROR); + return !(cpp_daisy (port, 0x20 + daisy) & + PARPORT_STATUS_ERROR); // For these modes we should switch to ECP mode: case IEEE1284_MODE_ECP: case IEEE1284_MODE_ECPRLE: case IEEE1284_MODE_ECPSWE: - return (cpp_daisy (port, 0xd0 + daisy) & - PARPORT_STATUS_ERROR); + return !(cpp_daisy (port, 0xd0 + daisy) & + PARPORT_STATUS_ERROR); // Nothing was told for BECP in Daisy chain specification. // May be it's wise to use ECP? @@ -413,8 +413,8 @@ int parport_daisy_select (struct parport *port, int daisy, int mode) case IEEE1284_MODE_BYTE: case IEEE1284_MODE_COMPAT: default: - return (cpp_daisy (port, 0xe0 + daisy) & - PARPORT_STATUS_ERROR); + return !(cpp_daisy (port, 0xe0 + daisy) & + PARPORT_STATUS_ERROR); } } @@ -436,7 +436,7 @@ static int select_port (struct parport *port) static int assign_addrs (struct parport *port) { - unsigned char s, last_dev; + unsigned char s; unsigned char daisy; int thisdev = numdevs; int detected; @@ -472,10 +472,13 @@ static int assign_addrs (struct parport *port) } parport_write_data (port, 0x78); udelay (2); - last_dev = 0; /* We've just been speaking to a device, so we - know there must be at least _one_ out there. */ + s = parport_read_status (port); - for (daisy = 0; daisy < 4; daisy++) { + for (daisy = 0; + (s & (PARPORT_STATUS_PAPEROUT|PARPORT_STATUS_SELECT)) + == (PARPORT_STATUS_PAPEROUT|PARPORT_STATUS_SELECT) + && daisy < 4; + ++daisy) { parport_write_data (port, daisy); udelay (2); parport_frob_control (port, @@ -485,14 +488,18 @@ static int assign_addrs (struct parport *port) parport_frob_control (port, PARPORT_CONTROL_STROBE, 0); udelay (1); - if (last_dev) - /* No more devices. */ - break; + add_dev (numdevs++, port, daisy); - last_dev = !(parport_read_status (port) - & PARPORT_STATUS_BUSY); + /* See if this device thought it was the last in the + * chain. */ + if (!(s & PARPORT_STATUS_BUSY)) + break; - add_dev (numdevs++, port, daisy); + /* We are seeing pass through status now. We see + last_dev from next device or if last_dev does not + work status lines from some non-daisy chain + device. */ + s = parport_read_status (port); } parport_write_data (port, 0xff); udelay (2); @@ -501,11 +508,11 @@ static int assign_addrs (struct parport *port) detected); /* Ask the new devices to introduce themselves. */ - deviceid = kmalloc (1000, GFP_KERNEL); + deviceid = kmalloc (1024, GFP_KERNEL); if (!deviceid) return 0; for (daisy = 0; thisdev < numdevs; thisdev++, daisy++) - parport_device_id (thisdev, deviceid, 1000); + parport_device_id (thisdev, deviceid, 1024); kfree (deviceid); return detected; diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c index ce1e2aad8b1..d6c77658231 100644 --- a/drivers/parport/ieee1284_ops.c +++ b/drivers/parport/ieee1284_ops.c @@ -165,17 +165,7 @@ size_t parport_ieee1284_read_nibble (struct parport *port, /* Does the error line indicate end of data? */ if (((i & 1) == 0) && (parport_read_status(port) & PARPORT_STATUS_ERROR)) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DPRINTK (KERN_DEBUG - "%s: No more nibble data (%d bytes)\n", - port->name, i/2); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + goto end_of_data; } /* Event 7: Set nAutoFd low. */ @@ -225,18 +215,25 @@ size_t parport_ieee1284_read_nibble (struct parport *port, byte = nibble; } - i /= 2; /* i is now in bytes */ - if (i == len) { /* Read the last nibble without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + DPRINTK (KERN_DEBUG + "%s: No more nibble data (%d bytes)\n", + port->name, i/2); + + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } - return i; + return i/2; #endif /* IEEE1284 support */ } @@ -256,17 +253,7 @@ size_t parport_ieee1284_read_byte (struct parport *port, /* Data available? */ if (parport_read_status (port) & PARPORT_STATUS_ERROR) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DPRINTK (KERN_DEBUG - "%s: No more byte data (%Zd bytes)\n", - port->name, count); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + goto end_of_data; } /* Event 14: Place data bus in high impedance state. */ @@ -318,11 +305,20 @@ size_t parport_ieee1284_read_byte (struct parport *port, if (count == len) { /* Read the last byte without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + DPRINTK (KERN_DEBUG + "%s: No more byte data (%Zd bytes)\n", + port->name, count); + + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } return count; diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index c6493ad7c0c..18e85ccdae6 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1169,7 +1169,7 @@ dump_parport_state ("fwd idle", port); /* GCC is not inlining extern inline function later overwriten to non-inline, so we use outlined_ variants here. */ -static struct parport_operations parport_pc_ops = +static const struct parport_operations parport_pc_ops = { .write_data = parport_pc_write_data, .read_data = parport_pc_read_data, @@ -1211,10 +1211,11 @@ static struct parport_operations parport_pc_ops = static void __devinit show_parconfig_smsc37c669(int io, int key) { int cr1,cr4,cra,cr23,cr26,cr27,i=0; - static const char *modes[]={ "SPP and Bidirectional (PS/2)", - "EPP and SPP", - "ECP", - "ECP and EPP" }; + static const char *const modes[]={ + "SPP and Bidirectional (PS/2)", + "EPP and SPP", + "ECP", + "ECP and EPP" }; outb(key,io); outb(key,io); @@ -1288,7 +1289,7 @@ static void __devinit show_parconfig_smsc37c669(int io, int key) static void __devinit show_parconfig_winbond(int io, int key) { int cr30,cr60,cr61,cr70,cr74,crf0,i=0; - static const char *modes[] = { + static const char *const modes[] = { "Standard (SPP) and Bidirectional(PS/2)", /* 0 */ "EPP-1.9 and SPP", "ECP", @@ -1297,7 +1298,9 @@ static void __devinit show_parconfig_winbond(int io, int key) "EPP-1.7 and SPP", /* 5 */ "undefined!", "ECP and EPP-1.7" }; - static char *irqtypes[] = { "pulsed low, high-Z", "follows nACK" }; + static char *const irqtypes[] = { + "pulsed low, high-Z", + "follows nACK" }; /* The registers are called compatible-PnP because the register layout is modelled after ISA-PnP, the access @@ -2396,7 +2399,8 @@ EXPORT_SYMBOL (parport_pc_unregister_port); /* ITE support maintained by Rich Liu <richliu@poorman.org> */ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, - int autodma, struct parport_pc_via_data *via) + int autodma, + const struct parport_pc_via_data *via) { short inta_addr[6] = { 0x2A0, 0x2C0, 0x220, 0x240, 0x1E0 }; struct resource *base_res; @@ -2524,7 +2528,8 @@ static struct parport_pc_via_data via_8231_data __devinitdata = { }; static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, - int autodma, struct parport_pc_via_data *via) + int autodma, + const struct parport_pc_via_data *via) { u8 tmp, tmp2, siofunc; u8 ppcontrol = 0; @@ -2694,8 +2699,9 @@ enum parport_pc_sio_types { /* each element directly indexed from enum list, above */ static struct parport_pc_superio { - int (*probe) (struct pci_dev *pdev, int autoirq, int autodma, struct parport_pc_via_data *via); - struct parport_pc_via_data *via; + int (*probe) (struct pci_dev *pdev, int autoirq, int autodma, + const struct parport_pc_via_data *via); + const struct parport_pc_via_data *via; } parport_pc_superio_info[] __devinitdata = { { sio_via_probe, &via_686a_data, }, { sio_via_probe, &via_8231_data, }, @@ -2828,7 +2834,7 @@ static struct parport_pc_pci { /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ }; -static struct pci_device_id parport_pc_pci_tbl[] = { +static const struct pci_device_id parport_pc_pci_tbl[] = { /* Super-IO onboard chips */ { 0x1106, 0x0686, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sio_via_686a }, { 0x1106, 0x8231, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sio_via_8231 }, diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c index 4b48b31ec23..b62aee8de3c 100644 --- a/drivers/parport/probe.c +++ b/drivers/parport/probe.c @@ -11,9 +11,9 @@ #include <linux/string.h> #include <asm/uaccess.h> -static struct { - char *token; - char *descr; +static const struct { + const char *token; + const char *descr; } classes[] = { { "", "Legacy device" }, { "PRINTER", "Printer" }, @@ -128,8 +128,131 @@ static void parse_data(struct parport *port, int device, char *str) kfree(txt); } +/* Read up to count-1 bytes of device id. Terminate buffer with + * '\0'. Buffer begins with two Device ID length bytes as given by + * device. */ +static ssize_t parport_read_device_id (struct parport *port, char *buffer, + size_t count) +{ + unsigned char length[2]; + unsigned lelen, belen; + size_t idlens[4]; + unsigned numidlens; + unsigned current_idlen; + ssize_t retval; + size_t len; + + /* First two bytes are MSB,LSB of inclusive length. */ + retval = parport_read (port, length, 2); + + if (retval < 0) + return retval; + if (retval != 2) + return -EIO; + + if (count < 2) + return 0; + memcpy(buffer, length, 2); + len = 2; + + /* Some devices wrongly send LE length, and some send it two + * bytes short. Construct a sorted array of lengths to try. */ + belen = (length[0] << 8) + length[1]; + lelen = (length[1] << 8) + length[0]; + idlens[0] = min(belen, lelen); + idlens[1] = idlens[0]+2; + if (belen != lelen) { + int off = 2; + /* Don't try lenghts of 0x100 and 0x200 as 1 and 2 */ + if (idlens[0] <= 2) + off = 0; + idlens[off] = max(belen, lelen); + idlens[off+1] = idlens[off]+2; + numidlens = off+2; + } + else { + /* Some devices don't truly implement Device ID, but + * just return constant nibble forever. This catches + * also those cases. */ + if (idlens[0] == 0 || idlens[0] > 0xFFF) { + printk (KERN_DEBUG "%s: reported broken Device ID" + " length of %#zX bytes\n", + port->name, idlens[0]); + return -EIO; + } + numidlens = 2; + } + + /* Try to respect the given ID length despite all the bugs in + * the ID length. Read according to shortest possible ID + * first. */ + for (current_idlen = 0; current_idlen < numidlens; ++current_idlen) { + size_t idlen = idlens[current_idlen]; + if (idlen+1 >= count) + break; + + retval = parport_read (port, buffer+len, idlen-len); + + if (retval < 0) + return retval; + len += retval; + + if (port->physport->ieee1284.phase != IEEE1284_PH_HBUSY_DAVAIL) { + if (belen != len) { + printk (KERN_DEBUG "%s: Device ID was %d bytes" + " while device told it would be %d" + " bytes\n", + port->name, len, belen); + } + goto done; + } + + /* This might end reading the Device ID too + * soon. Hopefully the needed fields were already in + * the first 256 bytes or so that we must have read so + * far. */ + if (buffer[len-1] == ';') { + printk (KERN_DEBUG "%s: Device ID reading stopped" + " before device told data not available. " + "Current idlen %d of %d, len bytes %02X %02X\n", + port->name, current_idlen, numidlens, + length[0], length[1]); + goto done; + } + } + if (current_idlen < numidlens) { + /* Buffer not large enough, read to end of buffer. */ + size_t idlen, len2; + if (len+1 < count) { + retval = parport_read (port, buffer+len, count-len-1); + if (retval < 0) + return retval; + len += retval; + } + /* Read the whole ID since some devices would not + * otherwise give back the Device ID from beginning + * next time when asked. */ + idlen = idlens[current_idlen]; + len2 = len; + while(len2 < idlen && retval > 0) { + char tmp[4]; + retval = parport_read (port, tmp, + min(sizeof tmp, idlen-len2)); + if (retval < 0) + return retval; + len2 += retval; + } + } + /* In addition, there are broken devices out there that don't + even finish off with a semi-colon. We do not need to care + about those at this time. */ + done: + buffer[len] = '\0'; + return len; +} + /* Get Std 1284 Device ID. */ -ssize_t parport_device_id (int devnum, char *buffer, size_t len) +ssize_t parport_device_id (int devnum, char *buffer, size_t count) { ssize_t retval = -ENXIO; struct pardevice *dev = parport_open (devnum, "Device ID probe", @@ -139,76 +262,20 @@ ssize_t parport_device_id (int devnum, char *buffer, size_t len) parport_claim_or_block (dev); - /* Negotiate to compatibility mode, and then to device ID mode. - * (This is in case we are already in device ID mode.) */ + /* Negotiate to compatibility mode, and then to device ID + * mode. (This so that we start form beginning of device ID if + * already in device ID mode.) */ parport_negotiate (dev->port, IEEE1284_MODE_COMPAT); retval = parport_negotiate (dev->port, IEEE1284_MODE_NIBBLE | IEEE1284_DEVICEID); if (!retval) { - int idlen; - unsigned char length[2]; - - /* First two bytes are MSB,LSB of inclusive length. */ - retval = parport_read (dev->port, length, 2); - - if (retval != 2) goto end_id; - - idlen = (length[0] << 8) + length[1] - 2; - /* - * Check if the caller-allocated buffer is large enough - * otherwise bail out or there will be an at least off by one. - */ - if (idlen + 1 < len) - len = idlen; - else { - retval = -EINVAL; - goto out; - } - retval = parport_read (dev->port, buffer, len); - - if (retval != len) - printk (KERN_DEBUG "%s: only read %Zd of %Zd ID bytes\n", - dev->port->name, retval, - len); - - /* Some printer manufacturers mistakenly believe that - the length field is supposed to be _exclusive_. - In addition, there are broken devices out there - that don't even finish off with a semi-colon. */ - if (buffer[len - 1] != ';') { - ssize_t diff; - diff = parport_read (dev->port, buffer + len, 2); - retval += diff; - - if (diff) - printk (KERN_DEBUG - "%s: device reported incorrect " - "length field (%d, should be %Zd)\n", - dev->port->name, idlen, retval); - else { - /* One semi-colon short of a device ID. */ - buffer[len++] = ';'; - printk (KERN_DEBUG "%s: faking semi-colon\n", - dev->port->name); - - /* If we get here, I don't think we - need to worry about the possible - standard violation of having read - more than we were told to. The - device is non-compliant anyhow. */ - } - } - - end_id: - buffer[len] = '\0'; + retval = parport_read_device_id (dev->port, buffer, count); parport_negotiate (dev->port, IEEE1284_MODE_COMPAT); + if (retval > 2) + parse_data (dev->port, dev->daisy, buffer+2); } - if (retval > 2) - parse_data (dev->port, dev->daisy, buffer); - -out: parport_release (dev); parport_close (dev); return retval; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 9cb3ab156b0..ea62bed6bc8 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -1002,6 +1002,7 @@ EXPORT_SYMBOL(parport_register_driver); EXPORT_SYMBOL(parport_unregister_driver); EXPORT_SYMBOL(parport_register_device); EXPORT_SYMBOL(parport_unregister_device); +EXPORT_SYMBOL(parport_get_port); EXPORT_SYMBOL(parport_put_port); EXPORT_SYMBOL(parport_find_number); EXPORT_SYMBOL(parport_find_base); diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c index 6b7583f497d..a1f0b0ba2bf 100644 --- a/drivers/pnp/pnpbios/bioscalls.c +++ b/drivers/pnp/pnpbios/bioscalls.c @@ -31,15 +31,6 @@ static struct { } pnp_bios_callpoint; -/* The PnP BIOS entries in the GDT */ -#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8) - -#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */ -#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */ -#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */ -#define PNP_TS1 (PNP_GDT+0x18) /* transfer data segment */ -#define PNP_TS2 (PNP_GDT+0x20) /* another data segment */ - /* * These are some opcodes for a "static asmlinkage" * As this code is *not* executed inside the linux kernel segment, but in a @@ -67,16 +58,11 @@ __asm__( ".previous \n" ); -#define Q_SET_SEL(cpu, selname, address, size) \ -do { \ -set_base(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], __va((u32)(address))); \ -set_limit(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], size); \ -} while(0) - #define Q2_SET_SEL(cpu, selname, address, size) \ do { \ -set_base(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], (u32)(address)); \ -set_limit(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], size); \ +struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \ +set_base(gdt[(selname) >> 3], (u32)(address)); \ +set_limit(gdt[(selname) >> 3], size); \ } while(0) static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; @@ -115,8 +101,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3, return PNP_FUNCTION_NOT_SUPPORTED; cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table,cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table,cpu)[0x40 / 8] = bad_bios_desc; + save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8]; + get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc; /* On some boxes IRQ's during PnP BIOS calls are deadly. */ spin_lock_irqsave(&pnp_bios_lock, flags); @@ -158,7 +144,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3, ); spin_unlock_irqrestore(&pnp_bios_lock, flags); - per_cpu(cpu_gdt_table,cpu)[0x40 / 8] = save_desc_40; + get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; put_cpu(); /* If we get here and this is set then the PnP BIOS faulted on us. */ @@ -290,12 +276,15 @@ int pnp_bios_dev_node_info(struct pnp_dev_node_info *data) static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data) { u16 status; + u16 tmp_nodenum; if (!pnp_bios_present()) return PNP_FUNCTION_NOT_SUPPORTED; if ( !boot && pnpbios_dont_use_current_config ) return PNP_FUNCTION_NOT_SUPPORTED; + tmp_nodenum = *nodenum; status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2, boot ? 2 : 1, PNP_DS, 0, - nodenum, sizeof(char), data, 65536); + &tmp_nodenum, sizeof(tmp_nodenum), data, 65536); + *nodenum = tmp_nodenum; return status; } @@ -535,10 +524,12 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header) set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); - for(i=0; i < NR_CPUS; i++) - { - Q2_SET_SEL(i, PNP_CS32, &pnp_bios_callfunc, 64 * 1024); - Q_SET_SEL(i, PNP_CS16, header->fields.pm16cseg, 64 * 1024); - Q_SET_SEL(i, PNP_DS, header->fields.pm16dseg, 64 * 1024); - } + for (i = 0; i < NR_CPUS; i++) { + struct desc_struct *gdt = get_cpu_gdt_table(i); + if (!gdt) + continue; + set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc); + set_base(gdt[GDT_ENTRY_PNPBIOS_CS16], __va(header->fields.pm16cseg)); + set_base(gdt[GDT_ENTRY_PNPBIOS_DS], __va(header->fields.pm16dseg)); + } } diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile index c99a2fe92fb..9803c9352d7 100644 --- a/drivers/s390/Makefile +++ b/drivers/s390/Makefile @@ -2,7 +2,7 @@ # Makefile for the S/390 specific device drivers # -obj-y += s390mach.o sysinfo.o +obj-y += s390mach.o sysinfo.o s390_rdev.o obj-y += cio/ block/ char/ crypto/ net/ scsi/ drivers-y += drivers/s390/built-in.o diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 6e7d7b06421..6f50cc9323d 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -1,11 +1,11 @@ -if ARCH_S390 +if S390 comment "S/390 block device drivers" - depends on ARCH_S390 + depends on S390 config BLK_DEV_XPRAM tristate "XPRAM disk support" - depends on ARCH_S390 + depends on S390 help Select this option if you want to use your expanded storage on S/390 or zSeries as a disk. This is useful as a _fast_ swap device if you @@ -49,7 +49,7 @@ config DASD_FBA config DASD_DIAG tristate "Support for DIAG access to Disks" - depends on DASD && ( ARCH_S390X = 'n' || EXPERIMENTAL) + depends on DASD && ( 64BIT = 'n' || EXPERIMENTAL) help Select this option if you want to use Diagnose250 command to access Disks under VM. If you are not running under VM or unsure what it is, diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fdb61380c52..f779f674dfa 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -7,7 +7,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999-2001 * - * $Revision: 1.167 $ + * $Revision: 1.172 $ */ #include <linux/config.h> @@ -604,7 +604,7 @@ dasd_smalloc_request(char *magic, int cplength, int datasize, void dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device) { -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT struct ccw1 *ccw; /* Clear any idals used for the request. */ @@ -1224,6 +1224,12 @@ __dasd_start_head(struct dasd_device * device) if (list_empty(&device->ccw_queue)) return; cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list); + /* check FAILFAST */ + if (device->stopped & ~DASD_STOPPED_PENDING && + test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) { + cqr->status = DASD_CQR_FAILED; + dasd_schedule_bh(device); + } if ((cqr->status == DASD_CQR_QUEUED) && (!device->stopped)) { /* try to start the first I/O that can be started */ @@ -1323,7 +1329,7 @@ void dasd_schedule_bh(struct dasd_device * device) { /* Protect against rescheduling. */ - if (atomic_compare_and_swap (0, 1, &device->tasklet_scheduled)) + if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0) return; dasd_get_device(device); tasklet_hi_schedule(&device->tasklet); @@ -1750,8 +1756,10 @@ dasd_exit(void) * SECTION: common functions for ccw_driver use */ -/* initial attempt at a probe function. this can be simplified once - * the other detection code is gone */ +/* + * Initial attempt at a probe function. this can be simplified once + * the other detection code is gone. + */ int dasd_generic_probe (struct ccw_device *cdev, struct dasd_discipline *discipline) @@ -1770,8 +1778,10 @@ dasd_generic_probe (struct ccw_device *cdev, return ret; } -/* this will one day be called from a global not_oper handler. - * It is also used by driver_unregister during module unload */ +/* + * This will one day be called from a global not_oper handler. + * It is also used by driver_unregister during module unload. + */ void dasd_generic_remove (struct ccw_device *cdev) { @@ -1798,9 +1808,11 @@ dasd_generic_remove (struct ccw_device *cdev) dasd_delete_device(device); } -/* activate a device. This is called from dasd_{eckd,fba}_probe() when either +/* + * Activate a device. This is called from dasd_{eckd,fba}_probe() when either * the device is detected for the first time and is supposed to be used - * or the user has started activation through sysfs */ + * or the user has started activation through sysfs. + */ int dasd_generic_set_online (struct ccw_device *cdev, struct dasd_discipline *discipline) @@ -1917,7 +1929,6 @@ dasd_generic_notify(struct ccw_device *cdev, int event) if (cqr->status == DASD_CQR_IN_IO) cqr->status = DASD_CQR_FAILED; device->stopped |= DASD_STOPPED_DC_EIO; - dasd_schedule_bh(device); } else { list_for_each_entry(cqr, &device->ccw_queue, list) if (cqr->status == DASD_CQR_IN_IO) { @@ -1927,6 +1938,7 @@ dasd_generic_notify(struct ccw_device *cdev, int event) device->stopped |= DASD_STOPPED_DC_WAIT; dasd_set_timer(device, 0); } + dasd_schedule_bh(device); ret = 1; break; case CIO_OPER: diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index ab8754e566b..ba80fdea7eb 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -6,7 +6,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 * - * $Revision: 1.51 $ + * $Revision: 1.53 $ */ #include <linux/config.h> @@ -25,6 +25,7 @@ #include <asm/io.h> #include <asm/s390_ext.h> #include <asm/todclk.h> +#include <asm/vtoc.h> #include "dasd_int.h" #include "dasd_diag.h" @@ -74,7 +75,7 @@ dia250(void *iob, int cmd) int rc; __asm__ __volatile__( -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT " lghi %0,3\n" " lgr 0,%3\n" " diag 0,%2,0x250\n" @@ -329,7 +330,7 @@ dasd_diag_check_device(struct dasd_device *device) struct dasd_diag_private *private; struct dasd_diag_characteristics *rdc_data; struct dasd_diag_bio bio; - struct dasd_diag_cms_label *label; + struct vtoc_cms_label *label; blocknum_t end_block; unsigned int sb, bsize; int rc; @@ -380,7 +381,7 @@ dasd_diag_check_device(struct dasd_device *device) mdsk_term_io(device); /* figure out blocksize of device */ - label = (struct dasd_diag_cms_label *) get_zeroed_page(GFP_KERNEL); + label = (struct vtoc_cms_label *) get_zeroed_page(GFP_KERNEL); if (label == NULL) { DEV_MESSAGE(KERN_WARNING, device, "%s", "No memory to allocate initialization request"); @@ -548,6 +549,8 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req) } cqr->retries = DIAG_MAX_RETRIES; cqr->buildclk = get_clock(); + if (req->flags & REQ_FAILFAST) + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = DIAG_TIMEOUT; cqr->status = DASD_CQR_FILLED; diff --git a/drivers/s390/block/dasd_diag.h b/drivers/s390/block/dasd_diag.h index df31484d73a..a4f80bd735f 100644 --- a/drivers/s390/block/dasd_diag.h +++ b/drivers/s390/block/dasd_diag.h @@ -6,7 +6,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 * - * $Revision: 1.8 $ + * $Revision: 1.9 $ */ #define MDSK_WRITE_REQ 0x01 @@ -44,29 +44,8 @@ struct dasd_diag_characteristics { u8 rdev_features; } __attribute__ ((packed, aligned(4))); -struct dasd_diag_cms_label { - u8 label_id[4]; - u8 vol_id[6]; - u16 version_id; - u32 block_size; - u32 origin_ptr; - u32 usable_count; - u32 formatted_count; - u32 block_count; - u32 used_count; - u32 fst_size; - u32 fst_count; - u8 format_date[6]; - u8 reserved1[2]; - u32 disk_offset; - u32 map_block; - u32 hblk_disp; - u32 user_disp; - u8 reserved2[4]; - u8 segment_name[8]; -} __attribute__ ((packed)); - -#ifdef CONFIG_ARCH_S390X + +#ifdef CONFIG_64BIT #define DASD_DIAG_FLAGA_DEFAULT DASD_DIAG_FLAGA_FORMAT_64BIT typedef u64 blocknum_t; @@ -107,7 +86,7 @@ struct dasd_diag_rw_io { struct dasd_diag_bio *bio_list; u8 spare4[8]; } __attribute__ ((packed, aligned(8))); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define DASD_DIAG_FLAGA_DEFAULT 0x0 typedef u32 blocknum_t; @@ -146,4 +125,4 @@ struct dasd_diag_rw_io { u32 interrupt_params; u8 spare3[20]; } __attribute__ ((packed, aligned(8))); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 811060e10c0..96eb4825858 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -7,7 +7,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 * - * $Revision: 1.71 $ + * $Revision: 1.74 $ */ #include <linux/config.h> @@ -1041,7 +1041,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) /* Eckd can only do full blocks. */ return ERR_PTR(-EINVAL); count += bv->bv_len >> (device->s2b_shift + 9); -#if defined(CONFIG_ARCH_S390X) +#if defined(CONFIG_64BIT) if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) cidaw += bv->bv_len >> (device->s2b_shift + 9); @@ -1136,6 +1136,8 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) recid++; } } + if (req->flags & REQ_FAILFAST) + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ cqr->lpm = private->path_data.ppm; @@ -1252,6 +1254,7 @@ dasd_eckd_release(struct block_device *bdev, int no, long args) cqr->cpaddr->cda = (__u32)(addr_t) cqr->data; cqr->device = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->retries = 0; cqr->expires = 2 * HZ; cqr->buildclk = get_clock(); @@ -1296,6 +1299,7 @@ dasd_eckd_reserve(struct block_device *bdev, int no, long args) cqr->cpaddr->cda = (__u32)(addr_t) cqr->data; cqr->device = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->retries = 0; cqr->expires = 2 * HZ; cqr->buildclk = get_clock(); @@ -1339,6 +1343,7 @@ dasd_eckd_steal_lock(struct block_device *bdev, int no, long args) cqr->cpaddr->cda = (__u32)(addr_t) cqr->data; cqr->device = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->retries = 0; cqr->expires = 2 * HZ; cqr->buildclk = get_clock(); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 28cb4613b7f..8ec75dc08e2 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -4,7 +4,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 * - * $Revision: 1.40 $ + * $Revision: 1.41 $ */ #include <linux/config.h> @@ -271,7 +271,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) /* Fba can only do full blocks. */ return ERR_PTR(-EINVAL); count += bv->bv_len >> (device->s2b_shift + 9); -#if defined(CONFIG_ARCH_S390X) +#if defined(CONFIG_64BIT) if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) cidaw += bv->bv_len / blksize; @@ -352,6 +352,8 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) recid++; } } + if (req->flags & REQ_FAILFAST) + set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ cqr->retries = 32; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 9fab04f3056..2fb05c4a528 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -6,7 +6,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 * - * $Revision: 1.65 $ + * $Revision: 1.68 $ */ #ifndef DASD_INT_H @@ -208,6 +208,7 @@ struct dasd_ccw_req { /* per dasd_ccw_req flags */ #define DASD_CQR_FLAGS_USE_ERP 0 /* use ERP for this request */ +#define DASD_CQR_FLAGS_FAILFAST 1 /* FAILFAST */ /* Signature for error recovery functions. */ typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 789595b3fa0..044b7537199 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -7,7 +7,7 @@ * Bugreports.to..: <Linux390@de.ibm.com> * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999-2001 * - * $Revision: 1.47 $ + * $Revision: 1.50 $ * * i/o controls for the dasd driver. */ @@ -352,6 +352,9 @@ dasd_ioctl_read_profile(struct block_device *bdev, int no, long args) if (device == NULL) return -ENODEV; + if (dasd_profile_level == DASD_PROFILE_OFF) + return -EIO; + if (copy_to_user((long __user *) args, (long *) &device->profile, sizeof (struct dasd_profile_info_t))) return -EFAULT; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 4fde4118899..2e727f49ad1 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -15,7 +15,7 @@ #include <asm/io.h> #include <linux/completion.h> #include <linux/interrupt.h> -#include <asm/ccwdev.h> // for s390_root_dev_(un)register() +#include <asm/s390_rdev.h> //#define DCSSBLK_DEBUG /* Debug messages on/off */ #define DCSSBLK_NAME "dcssblk" diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index d428c909b8a..bf3a67c3cc5 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -160,7 +160,7 @@ static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index) "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 4\n" " .long 0b,1b\n" @@ -208,7 +208,7 @@ static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index) "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 4\n" " .long 0b,1b\n" diff --git a/drivers/s390/char/sclp_cpi.c b/drivers/s390/char/sclp_cpi.c index 5a6cef2dfa1..80f7f31310e 100644 --- a/drivers/s390/char/sclp_cpi.c +++ b/drivers/s390/char/sclp_cpi.c @@ -204,7 +204,7 @@ cpi_module_init(void) printk(KERN_WARNING "cpi: no control program identification " "support\n"); sclp_unregister(&sclp_cpi_event); - return -ENOTSUPP; + return -EOPNOTSUPP; } req = cpi_prepare_req(); diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c index 83f75774df6..56fa6916889 100644 --- a/drivers/s390/char/sclp_quiesce.c +++ b/drivers/s390/char/sclp_quiesce.c @@ -32,7 +32,7 @@ do_load_quiesce_psw(void * __unused) psw_t quiesce_psw; int cpu; - if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid)) + if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) signal_processor(smp_processor_id(), sigp_stop); /* Wait for all other cpus to enter stopped state */ for_each_online_cpu(cpu) { diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 559d51490e2..5ced2725d6c 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -65,7 +65,7 @@ static void tapeblock_trigger_requeue(struct tape_device *device) { /* Protect against rescheduling. */ - if (atomic_compare_and_swap(0, 1, &device->blk_data.requeue_scheduled)) + if (atomic_cmpxchg(&device->blk_data.requeue_scheduled, 0, 1) != 0) return; schedule_work(&device->blk_data.requeue_task); } diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c index 5473c23fcb5..5acc0ace3d7 100644 --- a/drivers/s390/char/vmwatchdog.c +++ b/drivers/s390/char/vmwatchdog.c @@ -66,7 +66,7 @@ static int __diag288(enum vmwdt_func func, unsigned int timeout, __cmdl = len; err = 0; asm volatile ( -#ifdef __s390x__ +#ifdef CONFIG_64BIT "diag %2,%4,0x288\n" "1: \n" ".section .fixup,\"ax\"\n" diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index a1c52a68219..daf21e03b21 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/blacklist.c * S/390 common I/O routines -- blacklisting of specific devices - * $Revision: 1.35 $ + * $Revision: 1.39 $ * * Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -15,6 +15,7 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/ctype.h> #include <linux/device.h> @@ -34,10 +35,10 @@ * These can be single devices or ranges of devices */ -/* 65536 bits to indicate if a devno is blacklisted or not */ -#define __BL_DEV_WORDS ((__MAX_SUBCHANNELS + (8*sizeof(long) - 1)) / \ +/* 65536 bits for each set to indicate if a devno is blacklisted or not */ +#define __BL_DEV_WORDS ((__MAX_SUBCHANNEL + (8*sizeof(long) - 1)) / \ (8*sizeof(long))) -static unsigned long bl_dev[__BL_DEV_WORDS]; +static unsigned long bl_dev[__MAX_SSID + 1][__BL_DEV_WORDS]; typedef enum {add, free} range_action; /* @@ -45,21 +46,23 @@ typedef enum {add, free} range_action; * (Un-)blacklist the devices from-to */ static inline void -blacklist_range (range_action action, unsigned int from, unsigned int to) +blacklist_range (range_action action, unsigned int from, unsigned int to, + unsigned int ssid) { if (!to) to = from; - if (from > to || to > __MAX_SUBCHANNELS) { + if (from > to || to > __MAX_SUBCHANNEL || ssid > __MAX_SSID) { printk (KERN_WARNING "Invalid blacklist range " - "0x%04x to 0x%04x, skipping\n", from, to); + "0.%x.%04x to 0.%x.%04x, skipping\n", + ssid, from, ssid, to); return; } for (; from <= to; from++) { if (action == add) - set_bit (from, bl_dev); + set_bit (from, bl_dev[ssid]); else - clear_bit (from, bl_dev); + clear_bit (from, bl_dev[ssid]); } } @@ -69,7 +72,7 @@ blacklist_range (range_action action, unsigned int from, unsigned int to) * Shamelessly grabbed from dasd_devmap.c. */ static inline int -blacklist_busid(char **str, int *id0, int *id1, int *devno) +blacklist_busid(char **str, int *id0, int *ssid, int *devno) { int val, old_style; char *sav; @@ -86,7 +89,7 @@ blacklist_busid(char **str, int *id0, int *id1, int *devno) goto confused; val = simple_strtoul(*str, str, 16); if (old_style || (*str)[0] != '.') { - *id0 = *id1 = 0; + *id0 = *ssid = 0; if (val < 0 || val > 0xffff) goto confused; *devno = val; @@ -105,7 +108,7 @@ blacklist_busid(char **str, int *id0, int *id1, int *devno) val = simple_strtoul(*str, str, 16); if (val < 0 || val > 0xff || (*str)++[0] != '.') goto confused; - *id1 = val; + *ssid = val; if (!isxdigit((*str)[0])) /* We require at least one hex digit */ goto confused; val = simple_strtoul(*str, str, 16); @@ -125,7 +128,7 @@ confused: static inline int blacklist_parse_parameters (char *str, range_action action) { - unsigned int from, to, from_id0, to_id0, from_id1, to_id1; + unsigned int from, to, from_id0, to_id0, from_ssid, to_ssid; while (*str != 0 && *str != '\n') { range_action ra = action; @@ -142,23 +145,25 @@ blacklist_parse_parameters (char *str, range_action action) */ if (strncmp(str,"all,",4) == 0 || strcmp(str,"all") == 0 || strncmp(str,"all\n",4) == 0 || strncmp(str,"all ",4) == 0) { - from = 0; - to = __MAX_SUBCHANNELS; + int j; + str += 3; + for (j=0; j <= __MAX_SSID; j++) + blacklist_range(ra, 0, __MAX_SUBCHANNEL, j); } else { int rc; rc = blacklist_busid(&str, &from_id0, - &from_id1, &from); + &from_ssid, &from); if (rc) continue; to = from; to_id0 = from_id0; - to_id1 = from_id1; + to_ssid = from_ssid; if (*str == '-') { str++; rc = blacklist_busid(&str, &to_id0, - &to_id1, &to); + &to_ssid, &to); if (rc) continue; } @@ -168,18 +173,19 @@ blacklist_parse_parameters (char *str, range_action action) strsep(&str, ",\n")); continue; } - if ((from_id0 != to_id0) || (from_id1 != to_id1)) { + if ((from_id0 != to_id0) || + (from_ssid != to_ssid)) { printk(KERN_WARNING "invalid cio_ignore range " "%x.%x.%04x-%x.%x.%04x\n", - from_id0, from_id1, from, - to_id0, to_id1, to); + from_id0, from_ssid, from, + to_id0, to_ssid, to); continue; } + pr_debug("blacklist_setup: adding range " + "from %x.%x.%04x to %x.%x.%04x\n", + from_id0, from_ssid, from, to_id0, to_ssid, to); + blacklist_range (ra, from, to, to_ssid); } - /* FIXME: ignoring id0 and id1 here. */ - pr_debug("blacklist_setup: adding range " - "from 0.0.%04x to 0.0.%04x\n", from, to); - blacklist_range (ra, from, to); } return 1; } @@ -213,12 +219,33 @@ __setup ("cio_ignore=", blacklist_setup); * Used by validate_subchannel() */ int -is_blacklisted (int devno) +is_blacklisted (int ssid, int devno) { - return test_bit (devno, bl_dev); + return test_bit (devno, bl_dev[ssid]); } #ifdef CONFIG_PROC_FS +static int +__s390_redo_validation(struct subchannel_id schid, void *data) +{ + int ret; + struct subchannel *sch; + + sch = get_subchannel_by_schid(schid); + if (sch) { + /* Already known. */ + put_device(&sch->dev); + return 0; + } + ret = css_probe_device(schid); + if (ret == -ENXIO) + return ret; /* We're through. */ + if (ret == -ENOMEM) + /* Stop validation for now. Bad, but no need for a panic. */ + return ret; + return 0; +} + /* * Function: s390_redo_validation * Look for no longer blacklisted devices @@ -226,29 +253,9 @@ is_blacklisted (int devno) static inline void s390_redo_validation (void) { - unsigned int irq; - CIO_TRACE_EVENT (0, "redoval"); - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - int ret; - struct subchannel *sch; - - sch = get_subchannel_by_schid(irq); - if (sch) { - /* Already known. */ - put_device(&sch->dev); - continue; - } - ret = css_probe_device(irq); - if (ret == -ENXIO) - break; /* We're through. */ - if (ret == -ENOMEM) - /* - * Stop validation for now. Bad, but no need for a - * panic. - */ - break; - } + + for_each_subchannel(__s390_redo_validation, NULL); } /* @@ -278,41 +285,90 @@ blacklist_parse_proc_parameters (char *buf) s390_redo_validation (); } -/* FIXME: These should be real bus ids and not home-grown ones! */ -static int cio_ignore_read (char *page, char **start, off_t off, - int count, int *eof, void *data) +/* Iterator struct for all devices. */ +struct ccwdev_iter { + int devno; + int ssid; + int in_range; +}; + +static void * +cio_ignore_proc_seq_start(struct seq_file *s, loff_t *offset) { - const unsigned int entry_size = 18; /* "0.0.ABCD-0.0.EFGH\n" */ - long devno; - int len; - - len = 0; - for (devno = off; /* abuse the page variable - * as counter, see fs/proc/generic.c */ - devno < __MAX_SUBCHANNELS && len + entry_size < count; devno++) { - if (!test_bit(devno, bl_dev)) - continue; - len += sprintf(page + len, "0.0.%04lx", devno); - if (test_bit(devno + 1, bl_dev)) { /* print range */ - while (++devno < __MAX_SUBCHANNELS) - if (!test_bit(devno, bl_dev)) - break; - len += sprintf(page + len, "-0.0.%04lx", --devno); - } - len += sprintf(page + len, "\n"); - } + struct ccwdev_iter *iter; + + if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + return NULL; + iter = kzalloc(sizeof(struct ccwdev_iter), GFP_KERNEL); + if (!iter) + return ERR_PTR(-ENOMEM); + iter->ssid = *offset / (__MAX_SUBCHANNEL + 1); + iter->devno = *offset % (__MAX_SUBCHANNEL + 1); + return iter; +} + +static void +cio_ignore_proc_seq_stop(struct seq_file *s, void *it) +{ + if (!IS_ERR(it)) + kfree(it); +} + +static void * +cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) +{ + struct ccwdev_iter *iter; + + if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + return NULL; + iter = it; + if (iter->devno == __MAX_SUBCHANNEL) { + iter->devno = 0; + iter->ssid++; + if (iter->ssid > __MAX_SSID) + return NULL; + } else + iter->devno++; + (*offset)++; + return iter; +} - if (devno < __MAX_SUBCHANNELS) - *eof = 1; - *start = (char *) (devno - off); /* number of checked entries */ - return len; +static int +cio_ignore_proc_seq_show(struct seq_file *s, void *it) +{ + struct ccwdev_iter *iter; + + iter = it; + if (!is_blacklisted(iter->ssid, iter->devno)) + /* Not blacklisted, nothing to output. */ + return 0; + if (!iter->in_range) { + /* First device in range. */ + if ((iter->devno == __MAX_SUBCHANNEL) || + !is_blacklisted(iter->ssid, iter->devno + 1)) + /* Singular device. */ + return seq_printf(s, "0.%x.%04x\n", + iter->ssid, iter->devno); + iter->in_range = 1; + return seq_printf(s, "0.%x.%04x-", iter->ssid, iter->devno); + } + if ((iter->devno == __MAX_SUBCHANNEL) || + !is_blacklisted(iter->ssid, iter->devno + 1)) { + /* Last device in range. */ + iter->in_range = 0; + return seq_printf(s, "0.%x.%04x\n", iter->ssid, iter->devno); + } + return 0; } -static int cio_ignore_write(struct file *file, const char __user *user_buf, - unsigned long user_len, void *data) +static ssize_t +cio_ignore_write(struct file *file, const char __user *user_buf, + size_t user_len, loff_t *offset) { char *buf; + if (*offset) + return -EINVAL; if (user_len > 65536) user_len = 65536; buf = vmalloc (user_len + 1); /* maybe better use the stack? */ @@ -330,6 +386,27 @@ static int cio_ignore_write(struct file *file, const char __user *user_buf, return user_len; } +static struct seq_operations cio_ignore_proc_seq_ops = { + .start = cio_ignore_proc_seq_start, + .stop = cio_ignore_proc_seq_stop, + .next = cio_ignore_proc_seq_next, + .show = cio_ignore_proc_seq_show, +}; + +static int +cio_ignore_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cio_ignore_proc_seq_ops); +} + +static struct file_operations cio_ignore_proc_fops = { + .open = cio_ignore_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = cio_ignore_write, +}; + static int cio_ignore_proc_init (void) { @@ -340,8 +417,7 @@ cio_ignore_proc_init (void) if (!entry) return 0; - entry->read_proc = cio_ignore_read; - entry->write_proc = cio_ignore_write; + entry->proc_fops = &cio_ignore_proc_fops; return 1; } diff --git a/drivers/s390/cio/blacklist.h b/drivers/s390/cio/blacklist.h index fb42cafbe57..95e25c1df92 100644 --- a/drivers/s390/cio/blacklist.h +++ b/drivers/s390/cio/blacklist.h @@ -1,6 +1,6 @@ #ifndef S390_BLACKLIST_H #define S390_BLACKLIST_H -extern int is_blacklisted (int devno); +extern int is_blacklisted (int ssid, int devno); #endif diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index be9d2d65c22..e849289d4f3 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/ccwgroup.c * bus driver for ccwgroup - * $Revision: 1.32 $ + * $Revision: 1.33 $ * * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -263,7 +263,7 @@ ccwgroup_set_online(struct ccwgroup_device *gdev) struct ccwgroup_driver *gdrv; int ret; - if (atomic_compare_and_swap(0, 1, &gdev->onoff)) + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) return -EAGAIN; if (gdev->state == CCWGROUP_ONLINE) { ret = 0; @@ -289,7 +289,7 @@ ccwgroup_set_offline(struct ccwgroup_device *gdev) struct ccwgroup_driver *gdrv; int ret; - if (atomic_compare_and_swap(0, 1, &gdev->onoff)) + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) return -EAGAIN; if (gdev->state == CCWGROUP_OFFLINE) { ret = 0; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index fa3c23b80e3..7270808c02d 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/chsc.c * S/390 common I/O routines -- channel subsystem call - * $Revision: 1.120 $ + * $Revision: 1.126 $ * * Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -24,8 +24,6 @@ #include "ioasm.h" #include "chsc.h" -static struct channel_path *chps[NR_CHPIDS]; - static void *sei_page; static int new_channel_path(int chpid); @@ -33,13 +31,13 @@ static int new_channel_path(int chpid); static inline void set_chp_logically_online(int chp, int onoff) { - chps[chp]->state = onoff; + css[0]->chps[chp]->state = onoff; } static int get_chp_status(int chp) { - return (chps[chp] ? chps[chp]->state : -ENODEV); + return (css[0]->chps[chp] ? css[0]->chps[chp]->state : -ENODEV); } void @@ -77,7 +75,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page) struct { struct chsc_header request; - u16 reserved1; + u16 reserved1a:10; + u16 ssid:2; + u16 reserved1b:4; u16 f_sch; /* first subchannel */ u16 reserved2; u16 l_sch; /* last subchannel */ @@ -104,8 +104,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page) .code = 0x0004, }; - ssd_area->f_sch = sch->irq; - ssd_area->l_sch = sch->irq; + ssd_area->ssid = sch->schid.ssid; + ssd_area->f_sch = sch->schid.sch_no; + ssd_area->l_sch = sch->schid.sch_no; ccode = chsc(ssd_area); if (ccode > 0) { @@ -147,7 +148,8 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page) */ if (ssd_area->st > 3) { /* uhm, that looks strange... */ CIO_CRW_EVENT(0, "Strange subchannel type %d" - " for sch %04x\n", ssd_area->st, sch->irq); + " for sch 0.%x.%04x\n", ssd_area->st, + sch->schid.ssid, sch->schid.sch_no); /* * There may have been a new subchannel type defined in the * time since this code was written; since we don't know which @@ -156,8 +158,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page) return 0; } else { const char *type[4] = {"I/O", "chsc", "message", "ADM"}; - CIO_CRW_EVENT(6, "ssd: sch %04x is %s subchannel\n", - sch->irq, type[ssd_area->st]); + CIO_CRW_EVENT(6, "ssd: sch 0.%x.%04x is %s subchannel\n", + sch->schid.ssid, sch->schid.sch_no, + type[ssd_area->st]); sch->ssd_info.valid = 1; sch->ssd_info.type = ssd_area->st; @@ -218,13 +221,13 @@ s390_subchannel_remove_chpid(struct device *dev, void *data) int j; int mask; struct subchannel *sch; - __u8 *chpid; + struct channel_path *chpid; struct schib schib; sch = to_subchannel(dev); chpid = data; for (j = 0; j < 8; j++) - if (sch->schib.pmcw.chpid[j] == *chpid) + if (sch->schib.pmcw.chpid[j] == chpid->id) break; if (j >= 8) return 0; @@ -232,7 +235,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data) mask = 0x80 >> j; spin_lock(&sch->lock); - stsch(sch->irq, &schib); + stsch(sch->schid, &schib); if (!schib.pmcw.dnv) goto out_unreg; memcpy(&sch->schib, &schib, sizeof(struct schib)); @@ -284,7 +287,7 @@ out_unlock: out_unreg: spin_unlock(&sch->lock); sch->lpm = 0; - if (css_enqueue_subchannel_slow(sch->irq)) { + if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; } @@ -295,23 +298,30 @@ static inline void s390_set_chpid_offline( __u8 chpid) { char dbf_txt[15]; + struct device *dev; sprintf(dbf_txt, "chpr%x", chpid); CIO_TRACE_EVENT(2, dbf_txt); if (get_chp_status(chpid) <= 0) return; - - bus_for_each_dev(&css_bus_type, NULL, &chpid, + dev = get_device(&css[0]->chps[chpid]->dev); + bus_for_each_dev(&css_bus_type, NULL, to_channelpath(dev), s390_subchannel_remove_chpid); if (need_rescan || css_slow_subchannels_exist()) queue_work(slow_path_wq, &slow_path_work); + put_device(dev); } +struct res_acc_data { + struct channel_path *chp; + u32 fla_mask; + u16 fla; +}; + static int -s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask, - struct subchannel *sch) +s390_process_res_acc_sch(struct res_acc_data *res_data, struct subchannel *sch) { int found; int chp; @@ -323,8 +333,9 @@ s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask, * check if chpid is in information updated by ssd */ if (sch->ssd_info.valid && - sch->ssd_info.chpid[chp] == chpid && - (sch->ssd_info.fla[chp] & fla_mask) == fla) { + sch->ssd_info.chpid[chp] == res_data->chp->id && + (sch->ssd_info.fla[chp] & res_data->fla_mask) + == res_data->fla) { found = 1; break; } @@ -337,24 +348,87 @@ s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask, * new path information and eventually check for logically * offline chpids. */ - ccode = stsch(sch->irq, &sch->schib); + ccode = stsch(sch->schid, &sch->schib); if (ccode > 0) return 0; return 0x80 >> chp; } +static inline int +s390_process_res_acc_new_sch(struct subchannel_id schid) +{ + struct schib schib; + int ret; + /* + * We don't know the device yet, but since a path + * may be available now to the device we'll have + * to do recognition again. + * Since we don't have any idea about which chpid + * that beast may be on we'll have to do a stsch + * on all devices, grr... + */ + if (stsch_err(schid, &schib)) + /* We're through */ + return need_rescan ? -EAGAIN : -ENXIO; + + /* Put it on the slow path. */ + ret = css_enqueue_subchannel_slow(schid); + if (ret) { + css_clear_subchannel_slow_list(); + need_rescan = 1; + return -EAGAIN; + } + return 0; +} + static int -s390_process_res_acc (u8 chpid, __u16 fla, u32 fla_mask) +__s390_process_res_acc(struct subchannel_id schid, void *data) { + int chp_mask, old_lpm; + struct res_acc_data *res_data; struct subchannel *sch; - int irq, rc; + + res_data = (struct res_acc_data *)data; + sch = get_subchannel_by_schid(schid); + if (!sch) + /* Check if a subchannel is newly available. */ + return s390_process_res_acc_new_sch(schid); + + spin_lock_irq(&sch->lock); + + chp_mask = s390_process_res_acc_sch(res_data, sch); + + if (chp_mask == 0) { + spin_unlock_irq(&sch->lock); + return 0; + } + old_lpm = sch->lpm; + sch->lpm = ((sch->schib.pmcw.pim & + sch->schib.pmcw.pam & + sch->schib.pmcw.pom) + | chp_mask) & sch->opm; + if (!old_lpm && sch->lpm) + device_trigger_reprobe(sch); + else if (sch->driver && sch->driver->verify) + sch->driver->verify(&sch->dev); + + spin_unlock_irq(&sch->lock); + put_device(&sch->dev); + return (res_data->fla_mask == 0xffff) ? -ENODEV : 0; +} + + +static int +s390_process_res_acc (struct res_acc_data *res_data) +{ + int rc; char dbf_txt[15]; - sprintf(dbf_txt, "accpr%x", chpid); + sprintf(dbf_txt, "accpr%x", res_data->chp->id); CIO_TRACE_EVENT( 2, dbf_txt); - if (fla != 0) { - sprintf(dbf_txt, "fla%x", fla); + if (res_data->fla != 0) { + sprintf(dbf_txt, "fla%x", res_data->fla); CIO_TRACE_EVENT( 2, dbf_txt); } @@ -365,70 +439,11 @@ s390_process_res_acc (u8 chpid, __u16 fla, u32 fla_mask) * The more information we have (info), the less scanning * will we have to do. */ - - if (!get_chp_status(chpid)) - return 0; /* no need to do the rest */ - - rc = 0; - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - int chp_mask, old_lpm; - - sch = get_subchannel_by_schid(irq); - if (!sch) { - struct schib schib; - int ret; - /* - * We don't know the device yet, but since a path - * may be available now to the device we'll have - * to do recognition again. - * Since we don't have any idea about which chpid - * that beast may be on we'll have to do a stsch - * on all devices, grr... - */ - if (stsch(irq, &schib)) { - /* We're through */ - if (need_rescan) - rc = -EAGAIN; - break; - } - if (need_rescan) { - rc = -EAGAIN; - continue; - } - /* Put it on the slow path. */ - ret = css_enqueue_subchannel_slow(irq); - if (ret) { - css_clear_subchannel_slow_list(); - need_rescan = 1; - } - rc = -EAGAIN; - continue; - } - - spin_lock_irq(&sch->lock); - - chp_mask = s390_process_res_acc_sch(chpid, fla, fla_mask, sch); - - if (chp_mask == 0) { - - spin_unlock_irq(&sch->lock); - continue; - } - old_lpm = sch->lpm; - sch->lpm = ((sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom) - | chp_mask) & sch->opm; - if (!old_lpm && sch->lpm) - device_trigger_reprobe(sch); - else if (sch->driver && sch->driver->verify) - sch->driver->verify(&sch->dev); - - spin_unlock_irq(&sch->lock); - put_device(&sch->dev); - if (fla_mask == 0xffff) - break; - } + rc = for_each_subchannel(__s390_process_res_acc, res_data); + if (css_slow_subchannels_exist()) + rc = -EAGAIN; + else if (rc != -EAGAIN) + rc = 0; return rc; } @@ -466,6 +481,7 @@ int chsc_process_crw(void) { int chpid, ret; + struct res_acc_data res_data; struct { struct chsc_header request; u32 reserved1; @@ -499,8 +515,9 @@ chsc_process_crw(void) ret = 0; do { int ccode, status; + struct device *dev; memset(sei_area, 0, sizeof(*sei_area)); - + memset(&res_data, 0, sizeof(struct res_acc_data)); sei_area->request = (struct chsc_header) { .length = 0x0010, .code = 0x000e, @@ -573,26 +590,25 @@ chsc_process_crw(void) if (status < 0) new_channel_path(sei_area->rsid); else if (!status) - return 0; - if ((sei_area->vf & 0x80) == 0) { - pr_debug("chpid: %x\n", sei_area->rsid); - ret = s390_process_res_acc(sei_area->rsid, - 0, 0); - } else if ((sei_area->vf & 0xc0) == 0x80) { - pr_debug("chpid: %x link addr: %x\n", - sei_area->rsid, sei_area->fla); - ret = s390_process_res_acc(sei_area->rsid, - sei_area->fla, - 0xff00); - } else if ((sei_area->vf & 0xc0) == 0xc0) { - pr_debug("chpid: %x full link addr: %x\n", - sei_area->rsid, sei_area->fla); - ret = s390_process_res_acc(sei_area->rsid, - sei_area->fla, - 0xffff); + break; + dev = get_device(&css[0]->chps[sei_area->rsid]->dev); + res_data.chp = to_channelpath(dev); + pr_debug("chpid: %x", sei_area->rsid); + if ((sei_area->vf & 0xc0) != 0) { + res_data.fla = sei_area->fla; + if ((sei_area->vf & 0xc0) == 0xc0) { + pr_debug(" full link addr: %x", + sei_area->fla); + res_data.fla_mask = 0xffff; + } else { + pr_debug(" link addr: %x", + sei_area->fla); + res_data.fla_mask = 0xff00; + } } - pr_debug("\n"); - + ret = s390_process_res_acc(&res_data); + pr_debug("\n\n"); + put_device(dev); break; default: /* other stuff */ @@ -604,12 +620,72 @@ chsc_process_crw(void) return ret; } +static inline int +__chp_add_new_sch(struct subchannel_id schid) +{ + struct schib schib; + int ret; + + if (stsch(schid, &schib)) + /* We're through */ + return need_rescan ? -EAGAIN : -ENXIO; + + /* Put it on the slow path. */ + ret = css_enqueue_subchannel_slow(schid); + if (ret) { + css_clear_subchannel_slow_list(); + need_rescan = 1; + return -EAGAIN; + } + return 0; +} + + static int -chp_add(int chpid) +__chp_add(struct subchannel_id schid, void *data) { + int i; + struct channel_path *chp; struct subchannel *sch; - int irq, ret, rc; + + chp = (struct channel_path *)data; + sch = get_subchannel_by_schid(schid); + if (!sch) + /* Check if the subchannel is now available. */ + return __chp_add_new_sch(schid); + spin_lock(&sch->lock); + for (i=0; i<8; i++) + if (sch->schib.pmcw.chpid[i] == chp->id) { + if (stsch(sch->schid, &sch->schib) != 0) { + /* Endgame. */ + spin_unlock(&sch->lock); + return -ENXIO; + } + break; + } + if (i==8) { + spin_unlock(&sch->lock); + return 0; + } + sch->lpm = ((sch->schib.pmcw.pim & + sch->schib.pmcw.pam & + sch->schib.pmcw.pom) + | 0x80 >> i) & sch->opm; + + if (sch->driver && sch->driver->verify) + sch->driver->verify(&sch->dev); + + spin_unlock(&sch->lock); + put_device(&sch->dev); + return 0; +} + +static int +chp_add(int chpid) +{ + int rc; char dbf_txt[15]; + struct device *dev; if (!get_chp_status(chpid)) return 0; /* no need to do the rest */ @@ -617,59 +693,13 @@ chp_add(int chpid) sprintf(dbf_txt, "cadd%x", chpid); CIO_TRACE_EVENT(2, dbf_txt); - rc = 0; - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - int i; - - sch = get_subchannel_by_schid(irq); - if (!sch) { - struct schib schib; - - if (stsch(irq, &schib)) { - /* We're through */ - if (need_rescan) - rc = -EAGAIN; - break; - } - if (need_rescan) { - rc = -EAGAIN; - continue; - } - /* Put it on the slow path. */ - ret = css_enqueue_subchannel_slow(irq); - if (ret) { - css_clear_subchannel_slow_list(); - need_rescan = 1; - } - rc = -EAGAIN; - continue; - } - - spin_lock(&sch->lock); - for (i=0; i<8; i++) - if (sch->schib.pmcw.chpid[i] == chpid) { - if (stsch(sch->irq, &sch->schib) != 0) { - /* Endgame. */ - spin_unlock(&sch->lock); - return rc; - } - break; - } - if (i==8) { - spin_unlock(&sch->lock); - return rc; - } - sch->lpm = ((sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom) - | 0x80 >> i) & sch->opm; - - if (sch->driver && sch->driver->verify) - sch->driver->verify(&sch->dev); - - spin_unlock(&sch->lock); - put_device(&sch->dev); - } + dev = get_device(&css[0]->chps[chpid]->dev); + rc = for_each_subchannel(__chp_add, to_channelpath(dev)); + if (css_slow_subchannels_exist()) + rc = -EAGAIN; + if (rc != -EAGAIN) + rc = 0; + put_device(dev); return rc; } @@ -702,7 +732,7 @@ __check_for_io_and_kill(struct subchannel *sch, int index) if (!device_is_online(sch)) /* cio could be doing I/O. */ return 0; - cc = stsch(sch->irq, &sch->schib); + cc = stsch(sch->schid, &sch->schib); if (cc) return 0; if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) { @@ -743,7 +773,7 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on) * just varied off path. Then kill it. */ if (!__check_for_io_and_kill(sch, chp) && !sch->lpm) { - if (css_enqueue_subchannel_slow(sch->irq)) { + if (css_enqueue_subchannel_slow(sch->schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; } @@ -781,6 +811,29 @@ s390_subchannel_vary_chpid_on(struct device *dev, void *data) return 0; } +static int +__s390_vary_chpid_on(struct subchannel_id schid, void *data) +{ + struct schib schib; + struct subchannel *sch; + + sch = get_subchannel_by_schid(schid); + if (sch) { + put_device(&sch->dev); + return 0; + } + if (stsch_err(schid, &schib)) + /* We're through */ + return -ENXIO; + /* Put it on the slow path. */ + if (css_enqueue_subchannel_slow(schid)) { + css_clear_subchannel_slow_list(); + need_rescan = 1; + return -EAGAIN; + } + return 0; +} + /* * Function: s390_vary_chpid * Varies the specified chpid online or offline @@ -789,8 +842,7 @@ static int s390_vary_chpid( __u8 chpid, int on) { char dbf_text[15]; - int status, irq, ret; - struct subchannel *sch; + int status; sprintf(dbf_text, on?"varyon%x":"varyoff%x", chpid); CIO_TRACE_EVENT( 2, dbf_text); @@ -815,30 +867,9 @@ s390_vary_chpid( __u8 chpid, int on) bus_for_each_dev(&css_bus_type, NULL, &chpid, on ? s390_subchannel_vary_chpid_on : s390_subchannel_vary_chpid_off); - if (!on) - goto out; - /* Scan for new devices on varied on path. */ - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - struct schib schib; - - if (need_rescan) - break; - sch = get_subchannel_by_schid(irq); - if (sch) { - put_device(&sch->dev); - continue; - } - if (stsch(irq, &schib)) - /* We're through */ - break; - /* Put it on the slow path. */ - ret = css_enqueue_subchannel_slow(irq); - if (ret) { - css_clear_subchannel_slow_list(); - need_rescan = 1; - } - } -out: + if (on) + /* Scan for new devices on varied on path. */ + for_each_subchannel(__s390_vary_chpid_on, NULL); if (need_rescan || css_slow_subchannels_exist()) queue_work(slow_path_wq, &slow_path_work); return 0; @@ -995,7 +1026,7 @@ new_channel_path(int chpid) chp->id = chpid; chp->state = 1; chp->dev = (struct device) { - .parent = &css_bus_device, + .parent = &css[0]->device, .release = chp_release, }; snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp0.%x", chpid); @@ -1017,7 +1048,7 @@ new_channel_path(int chpid) device_unregister(&chp->dev); goto out_free; } else - chps[chpid] = chp; + css[0]->chps[chpid] = chp; return ret; out_free: kfree(chp); @@ -1030,7 +1061,7 @@ chsc_get_chp_desc(struct subchannel *sch, int chp_no) struct channel_path *chp; struct channel_path_desc *desc; - chp = chps[sch->schib.pmcw.chpid[chp_no]]; + chp = css[0]->chps[sch->schib.pmcw.chpid[chp_no]]; if (!chp) return NULL; desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL); @@ -1051,6 +1082,54 @@ chsc_alloc_sei_area(void) return (sei_page ? 0 : -ENOMEM); } +int __init +chsc_enable_facility(int operation_code) +{ + int ret; + struct { + struct chsc_header request; + u8 reserved1:4; + u8 format:4; + u8 reserved2; + u16 operation_code; + u32 reserved3; + u32 reserved4; + u32 operation_data_area[252]; + struct chsc_header response; + u32 reserved5:4; + u32 format2:4; + u32 reserved6:24; + } *sda_area; + + sda_area = (void *)get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!sda_area) + return -ENOMEM; + sda_area->request = (struct chsc_header) { + .length = 0x0400, + .code = 0x0031, + }; + sda_area->operation_code = operation_code; + + ret = chsc(sda_area); + if (ret > 0) { + ret = (ret == 3) ? -ENODEV : -EBUSY; + goto out; + } + switch (sda_area->response.code) { + case 0x0003: /* invalid request block */ + case 0x0007: + ret = -EINVAL; + break; + case 0x0004: /* command not provided */ + case 0x0101: /* facility not provided */ + ret = -EOPNOTSUPP; + break; + } + out: + free_page((unsigned long)sda_area); + return ret; +} + subsys_initcall(chsc_alloc_sei_area); struct css_general_char css_general_characteristics; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index be20da49d14..44e4b4bb1c5 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -1,12 +1,12 @@ #ifndef S390_CHSC_H #define S390_CHSC_H -#define NR_CHPIDS 256 - #define CHSC_SEI_ACC_CHPID 1 #define CHSC_SEI_ACC_LINKADDR 2 #define CHSC_SEI_ACC_FULLLINKADDR 3 +#define CHSC_SDA_OC_MSS 0x2 + struct chsc_header { u16 length; u16 code; @@ -43,7 +43,9 @@ struct css_general_char { u32 ext_mb : 1; /* bit 48 */ u32 : 7; u32 aif_tdd : 1; /* bit 56 */ - u32 : 10; + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; u32 aif_osa : 1; /* bit 67 */ u32 : 28; }__attribute__((packed)); @@ -63,4 +65,9 @@ extern int chsc_determine_css_characteristics(void); extern int css_characteristics_avail; extern void *chsc_get_chp_desc(struct subchannel*, int); + +extern int chsc_enable_facility(int); + +#define to_channelpath(dev) container_of(dev, struct channel_path, dev) + #endif diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 185bc73c3ec..7376bc87206 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/cio.c * S/390 common I/O routines -- low level i/o calls - * $Revision: 1.135 $ + * $Revision: 1.138 $ * * Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -135,7 +135,7 @@ cio_tpi(void) return 0; irb = (struct irb *) __LC_IRB; /* Store interrupt response block to lowcore. */ - if (tsch (tpi_info->irq, irb) != 0) + if (tsch (tpi_info->schid, irb) != 0) /* Not status pending or not operational. */ return 1; sch = (struct subchannel *)(unsigned long)tpi_info->intparm; @@ -163,10 +163,11 @@ cio_start_handle_notoper(struct subchannel *sch, __u8 lpm) else sch->lpm = 0; - stsch (sch->irq, &sch->schib); + stsch (sch->schid, &sch->schib); CIO_MSG_EVENT(0, "cio_start: 'not oper' status for " - "subchannel %04x!\n", sch->irq); + "subchannel 0.%x.%04x!\n", sch->schid.ssid, + sch->schid.sch_no); sprintf(dbf_text, "no%s", sch->dev.bus_id); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, &sch->schib, sizeof (struct schib)); @@ -194,7 +195,7 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */ sch->orb.spnd = sch->options.suspend; sch->orb.ssic = sch->options.suspend && sch->options.inter; sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* * for 64 bit we always support 64 bit IDAWs with 4k page size only */ @@ -204,7 +205,7 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */ sch->orb.key = key >> 4; /* issue "Start Subchannel" */ sch->orb.cpa = (__u32) __pa (cpa); - ccode = ssch (sch->irq, &sch->orb); + ccode = ssch (sch->schid, &sch->orb); /* process condition code */ sprintf (dbf_txt, "ccode:%d", ccode); @@ -243,7 +244,7 @@ cio_resume (struct subchannel *sch) CIO_TRACE_EVENT (4, "resIO"); CIO_TRACE_EVENT (4, sch->dev.bus_id); - ccode = rsch (sch->irq); + ccode = rsch (sch->schid); sprintf (dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT (4, dbf_txt); @@ -283,7 +284,7 @@ cio_halt(struct subchannel *sch) /* * Issue "Halt subchannel" and process condition code */ - ccode = hsch (sch->irq); + ccode = hsch (sch->schid); sprintf (dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT (2, dbf_txt); @@ -318,7 +319,7 @@ cio_clear(struct subchannel *sch) /* * Issue "Clear subchannel" and process condition code */ - ccode = csch (sch->irq); + ccode = csch (sch->schid); sprintf (dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT (2, dbf_txt); @@ -351,7 +352,7 @@ cio_cancel (struct subchannel *sch) CIO_TRACE_EVENT (2, "cancelIO"); CIO_TRACE_EVENT (2, sch->dev.bus_id); - ccode = xsch (sch->irq); + ccode = xsch (sch->schid); sprintf (dbf_txt, "ccode:%d", ccode); CIO_TRACE_EVENT (2, dbf_txt); @@ -359,7 +360,7 @@ cio_cancel (struct subchannel *sch) switch (ccode) { case 0: /* success */ /* Update information in scsw. */ - stsch (sch->irq, &sch->schib); + stsch (sch->schid, &sch->schib); return 0; case 1: /* status pending */ return -EBUSY; @@ -381,7 +382,7 @@ cio_modify (struct subchannel *sch) ret = 0; for (retry = 0; retry < 5; retry++) { - ccode = msch_err (sch->irq, &sch->schib); + ccode = msch_err (sch->schid, &sch->schib); if (ccode < 0) /* -EIO if msch gets a program check. */ return ccode; switch (ccode) { @@ -414,7 +415,7 @@ cio_enable_subchannel (struct subchannel *sch, unsigned int isc) CIO_TRACE_EVENT (2, "ensch"); CIO_TRACE_EVENT (2, sch->dev.bus_id); - ccode = stsch (sch->irq, &sch->schib); + ccode = stsch (sch->schid, &sch->schib); if (ccode) return -ENODEV; @@ -432,13 +433,13 @@ cio_enable_subchannel (struct subchannel *sch, unsigned int isc) */ sch->schib.pmcw.csense = 0; if (ret == 0) { - stsch (sch->irq, &sch->schib); + stsch (sch->schid, &sch->schib); if (sch->schib.pmcw.ena) break; } if (ret == -EBUSY) { struct irb irb; - if (tsch(sch->irq, &irb) != 0) + if (tsch(sch->schid, &irb) != 0) break; } } @@ -461,7 +462,7 @@ cio_disable_subchannel (struct subchannel *sch) CIO_TRACE_EVENT (2, "dissch"); CIO_TRACE_EVENT (2, sch->dev.bus_id); - ccode = stsch (sch->irq, &sch->schib); + ccode = stsch (sch->schid, &sch->schib); if (ccode == 3) /* Not operational. */ return -ENODEV; @@ -485,7 +486,7 @@ cio_disable_subchannel (struct subchannel *sch) */ break; if (ret == 0) { - stsch (sch->irq, &sch->schib); + stsch (sch->schid, &sch->schib); if (!sch->schib.pmcw.ena) break; } @@ -508,12 +509,12 @@ cio_disable_subchannel (struct subchannel *sch) * -ENODEV for subchannels with invalid device number or blacklisted devices */ int -cio_validate_subchannel (struct subchannel *sch, unsigned int irq) +cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid) { char dbf_txt[15]; int ccode; - sprintf (dbf_txt, "valsch%x", irq); + sprintf (dbf_txt, "valsch%x", schid.sch_no); CIO_TRACE_EVENT (4, dbf_txt); /* Nuke all fields. */ @@ -522,17 +523,20 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq) spin_lock_init(&sch->lock); /* Set a name for the subchannel */ - snprintf (sch->dev.bus_id, BUS_ID_SIZE, "0.0.%04x", irq); + snprintf (sch->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", schid.ssid, + schid.sch_no); /* * The first subchannel that is not-operational (ccode==3) * indicates that there aren't any more devices available. + * If stsch gets an exception, it means the current subchannel set + * is not valid. */ - sch->irq = irq; - ccode = stsch (irq, &sch->schib); + ccode = stsch_err (schid, &sch->schib); if (ccode) - return -ENXIO; + return (ccode == 3) ? -ENXIO : ccode; + sch->schid = schid; /* Copy subchannel type from path management control word. */ sch->st = sch->schib.pmcw.st; @@ -541,9 +545,9 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq) */ if (sch->st != 0) { CIO_DEBUG(KERN_INFO, 0, - "Subchannel %04X reports " + "Subchannel 0.%x.%04x reports " "non-I/O subchannel type %04X\n", - sch->irq, sch->st); + sch->schid.ssid, sch->schid.sch_no, sch->st); /* We stop here for non-io subchannels. */ return sch->st; } @@ -554,26 +558,29 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq) return -ENODEV; /* Devno is valid. */ - if (is_blacklisted (sch->schib.pmcw.dev)) { + if (is_blacklisted (sch->schid.ssid, sch->schib.pmcw.dev)) { /* * This device must not be known to Linux. So we simply * say that there is no device and return ENODEV. */ CIO_MSG_EVENT(0, "Blacklisted device detected " - "at devno %04X\n", sch->schib.pmcw.dev); + "at devno %04X, subchannel set %x\n", + sch->schib.pmcw.dev, sch->schid.ssid); return -ENODEV; } sch->opm = 0xff; - chsc_validate_chpids(sch); + if (!cio_is_console(sch->schid)) + chsc_validate_chpids(sch); sch->lpm = sch->schib.pmcw.pim & sch->schib.pmcw.pam & sch->schib.pmcw.pom & sch->opm; CIO_DEBUG(KERN_INFO, 0, - "Detected device %04X on subchannel %04X" + "Detected device %04x on subchannel 0.%x.%04X" " - PIM = %02X, PAM = %02X, POM = %02X\n", - sch->schib.pmcw.dev, sch->irq, sch->schib.pmcw.pim, + sch->schib.pmcw.dev, sch->schid.ssid, + sch->schid.sch_no, sch->schib.pmcw.pim, sch->schib.pmcw.pam, sch->schib.pmcw.pom); /* @@ -632,7 +639,7 @@ do_IRQ (struct pt_regs *regs) if (sch) spin_lock(&sch->lock); /* Store interrupt response block to lowcore. */ - if (tsch (tpi_info->irq, irb) == 0 && sch) { + if (tsch (tpi_info->schid, irb) == 0 && sch) { /* Keep subchannel information word up to date. */ memcpy (&sch->schib.scsw, &irb->scsw, sizeof (irb->scsw)); @@ -691,28 +698,36 @@ wait_cons_dev (void) } static int -cio_console_irq(void) +cio_test_for_console(struct subchannel_id schid, void *data) { - int irq; + if (stsch_err(schid, &console_subchannel.schib) != 0) + return -ENXIO; + if (console_subchannel.schib.pmcw.dnv && + console_subchannel.schib.pmcw.dev == + console_devno) { + console_irq = schid.sch_no; + return 1; /* found */ + } + return 0; +} + + +static int +cio_get_console_sch_no(void) +{ + struct subchannel_id schid; + init_subchannel_id(&schid); if (console_irq != -1) { /* VM provided us with the irq number of the console. */ - if (stsch(console_irq, &console_subchannel.schib) != 0 || + schid.sch_no = console_irq; + if (stsch(schid, &console_subchannel.schib) != 0 || !console_subchannel.schib.pmcw.dnv) return -1; console_devno = console_subchannel.schib.pmcw.dev; } else if (console_devno != -1) { /* At least the console device number is known. */ - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - if (stsch(irq, &console_subchannel.schib) != 0) - break; - if (console_subchannel.schib.pmcw.dnv && - console_subchannel.schib.pmcw.dev == - console_devno) { - console_irq = irq; - break; - } - } + for_each_subchannel(cio_test_for_console, NULL); if (console_irq == -1) return -1; } else { @@ -728,17 +743,20 @@ cio_console_irq(void) struct subchannel * cio_probe_console(void) { - int irq, ret; + int sch_no, ret; + struct subchannel_id schid; if (xchg(&console_subchannel_in_use, 1) != 0) return ERR_PTR(-EBUSY); - irq = cio_console_irq(); - if (irq == -1) { + sch_no = cio_get_console_sch_no(); + if (sch_no == -1) { console_subchannel_in_use = 0; return ERR_PTR(-ENODEV); } memset(&console_subchannel, 0, sizeof(struct subchannel)); - ret = cio_validate_subchannel(&console_subchannel, irq); + init_subchannel_id(&schid); + schid.sch_no = sch_no; + ret = cio_validate_subchannel(&console_subchannel, schid); if (ret) { console_subchannel_in_use = 0; return ERR_PTR(-ENODEV); @@ -770,11 +788,11 @@ cio_release_console(void) /* Bah... hack to catch console special sausages. */ int -cio_is_console(int irq) +cio_is_console(struct subchannel_id schid) { if (!console_subchannel_in_use) return 0; - return (irq == console_subchannel.irq); + return schid_equal(&schid, &console_subchannel.schid); } struct subchannel * @@ -787,7 +805,7 @@ cio_get_console_subchannel(void) #endif static inline int -__disable_subchannel_easy(unsigned int schid, struct schib *schib) +__disable_subchannel_easy(struct subchannel_id schid, struct schib *schib) { int retry, cc; @@ -805,7 +823,7 @@ __disable_subchannel_easy(unsigned int schid, struct schib *schib) } static inline int -__clear_subchannel_easy(unsigned int schid) +__clear_subchannel_easy(struct subchannel_id schid) { int retry; @@ -815,8 +833,8 @@ __clear_subchannel_easy(unsigned int schid) struct tpi_info ti; if (tpi(&ti)) { - tsch(ti.irq, (struct irb *)__LC_IRB); - if (ti.irq == schid) + tsch(ti.schid, (struct irb *)__LC_IRB); + if (schid_equal(&ti.schid, &schid)) return 0; } udelay(100); @@ -825,31 +843,33 @@ __clear_subchannel_easy(unsigned int schid) } extern void do_reipl(unsigned long devno); +static int +__shutdown_subchannel_easy(struct subchannel_id schid, void *data) +{ + struct schib schib; + + if (stsch_err(schid, &schib)) + return -ENXIO; + if (!schib.pmcw.ena) + return 0; + switch(__disable_subchannel_easy(schid, &schib)) { + case 0: + case -ENODEV: + break; + default: /* -EBUSY */ + if (__clear_subchannel_easy(schid)) + break; /* give up... */ + stsch(schid, &schib); + __disable_subchannel_easy(schid, &schib); + } + return 0; +} -/* Clear all subchannels. */ void clear_all_subchannels(void) { - unsigned int schid; - local_irq_disable(); - for (schid=0;schid<=highest_subchannel;schid++) { - struct schib schib; - if (stsch(schid, &schib)) - break; /* break out of the loop */ - if (!schib.pmcw.ena) - continue; - switch(__disable_subchannel_easy(schid, &schib)) { - case 0: - case -ENODEV: - break; - default: /* -EBUSY */ - if (__clear_subchannel_easy(schid)) - break; /* give up... jump out of switch */ - stsch(schid, &schib); - __disable_subchannel_easy(schid, &schib); - } - } + for_each_subchannel(__shutdown_subchannel_easy, NULL); } /* Make sure all subchannels are quiet before we re-ipl an lpar. */ diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index c50a9da420a..0ca987344e0 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -1,6 +1,8 @@ #ifndef S390_CIO_H #define S390_CIO_H +#include "schid.h" + /* * where we put the ssd info */ @@ -83,7 +85,7 @@ struct orb { /* subchannel data structure used by I/O subroutines */ struct subchannel { - unsigned int irq; /* aka. subchannel number */ + struct subchannel_id schid; spinlock_t lock; /* subchannel lock */ enum { @@ -114,7 +116,7 @@ struct subchannel { #define to_subchannel(n) container_of(n, struct subchannel, dev) -extern int cio_validate_subchannel (struct subchannel *, unsigned int); +extern int cio_validate_subchannel (struct subchannel *, struct subchannel_id); extern int cio_enable_subchannel (struct subchannel *, unsigned int); extern int cio_disable_subchannel (struct subchannel *); extern int cio_cancel (struct subchannel *); @@ -127,14 +129,15 @@ extern int cio_cancel (struct subchannel *); extern int cio_set_options (struct subchannel *, int); extern int cio_get_options (struct subchannel *); extern int cio_modify (struct subchannel *); + /* Use with care. */ #ifdef CONFIG_CCW_CONSOLE extern struct subchannel *cio_probe_console(void); extern void cio_release_console(void); -extern int cio_is_console(int irq); +extern int cio_is_console(struct subchannel_id); extern struct subchannel *cio_get_console_subchannel(void); #else -#define cio_is_console(irq) 0 +#define cio_is_console(schid) 0 #define cio_get_console_subchannel() NULL #endif diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index b978f7fe832..0b03714e696 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1,5 +1,5 @@ /* - * linux/drivers/s390/cio/cmf.c ($Revision: 1.16 $) + * linux/drivers/s390/cio/cmf.c ($Revision: 1.19 $) * * Linux on zSeries Channel Measurement Facility support * @@ -178,7 +178,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address) /* msch can silently fail, so do it again if necessary */ for (retry = 0; retry < 3; retry++) { /* prepare schib */ - stsch(sch->irq, schib); + stsch(sch->schid, schib); schib->pmcw.mme = mme; schib->pmcw.mbfc = mbfc; /* address can be either a block address or a block index */ @@ -188,7 +188,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address) schib->pmcw.mbi = address; /* try to submit it */ - switch(ret = msch_err(sch->irq, schib)) { + switch(ret = msch_err(sch->schid, schib)) { case 0: break; case 1: @@ -202,7 +202,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address) ret = -EINVAL; break; } - stsch(sch->irq, schib); /* restore the schib */ + stsch(sch->schid, schib); /* restore the schib */ if (ret) break; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 555119cacc2..e565193650c 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/css.c * driver for channel subsystem - * $Revision: 1.85 $ + * $Revision: 1.93 $ * * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -21,19 +21,35 @@ #include "ioasm.h" #include "chsc.h" -unsigned int highest_subchannel; int need_rescan = 0; int css_init_done = 0; +static int max_ssid = 0; + +struct channel_subsystem *css[__MAX_CSSID + 1]; -struct pgid global_pgid; int css_characteristics_avail = 0; -struct device css_bus_device = { - .bus_id = "css0", -}; +inline int +for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data) +{ + struct subchannel_id schid; + int ret; + + init_subchannel_id(&schid); + ret = -ENODEV; + do { + do { + ret = fn(schid, data); + if (ret) + break; + } while (schid.sch_no++ < __MAX_SUBCHANNEL); + schid.sch_no = 0; + } while (schid.ssid++ < max_ssid); + return ret; +} static struct subchannel * -css_alloc_subchannel(int irq) +css_alloc_subchannel(struct subchannel_id schid) { struct subchannel *sch; int ret; @@ -41,13 +57,11 @@ css_alloc_subchannel(int irq) sch = kmalloc (sizeof (*sch), GFP_KERNEL | GFP_DMA); if (sch == NULL) return ERR_PTR(-ENOMEM); - ret = cio_validate_subchannel (sch, irq); + ret = cio_validate_subchannel (sch, schid); if (ret < 0) { kfree(sch); return ERR_PTR(ret); } - if (irq > highest_subchannel) - highest_subchannel = irq; if (sch->st != SUBCHANNEL_TYPE_IO) { /* For now we ignore all non-io subchannels. */ @@ -87,7 +101,7 @@ css_subchannel_release(struct device *dev) struct subchannel *sch; sch = to_subchannel(dev); - if (!cio_is_console(sch->irq)) + if (!cio_is_console(sch->schid)) kfree(sch); } @@ -99,7 +113,7 @@ css_register_subchannel(struct subchannel *sch) int ret; /* Initialize the subchannel structure */ - sch->dev.parent = &css_bus_device; + sch->dev.parent = &css[0]->device; sch->dev.bus = &css_bus_type; sch->dev.release = &css_subchannel_release; @@ -114,12 +128,12 @@ css_register_subchannel(struct subchannel *sch) } int -css_probe_device(int irq) +css_probe_device(struct subchannel_id schid) { int ret; struct subchannel *sch; - sch = css_alloc_subchannel(irq); + sch = css_alloc_subchannel(schid); if (IS_ERR(sch)) return PTR_ERR(sch); ret = css_register_subchannel(sch); @@ -132,26 +146,26 @@ static int check_subchannel(struct device * dev, void * data) { struct subchannel *sch; - int irq = (unsigned long)data; + struct subchannel_id *schid = data; sch = to_subchannel(dev); - return (sch->irq == irq); + return schid_equal(&sch->schid, schid); } struct subchannel * -get_subchannel_by_schid(int irq) +get_subchannel_by_schid(struct subchannel_id schid) { struct device *dev; dev = bus_find_device(&css_bus_type, NULL, - (void *)(unsigned long)irq, check_subchannel); + (void *)&schid, check_subchannel); return dev ? to_subchannel(dev) : NULL; } static inline int -css_get_subchannel_status(struct subchannel *sch, int schid) +css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid) { struct schib schib; int cc; @@ -170,13 +184,13 @@ css_get_subchannel_status(struct subchannel *sch, int schid) } static int -css_evaluate_subchannel(int irq, int slow) +css_evaluate_subchannel(struct subchannel_id schid, int slow) { int event, ret, disc; struct subchannel *sch; unsigned long flags; - sch = get_subchannel_by_schid(irq); + sch = get_subchannel_by_schid(schid); disc = sch ? device_is_disconnected(sch) : 0; if (disc && slow) { if (sch) @@ -194,9 +208,10 @@ css_evaluate_subchannel(int irq, int slow) put_device(&sch->dev); return -EAGAIN; /* Will be done on the slow path. */ } - event = css_get_subchannel_status(sch, irq); - CIO_MSG_EVENT(4, "Evaluating schid %04x, event %d, %s, %s path.\n", - irq, event, sch?(disc?"disconnected":"normal"):"unknown", + event = css_get_subchannel_status(sch, schid); + CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n", + schid.ssid, schid.sch_no, event, + sch?(disc?"disconnected":"normal"):"unknown", slow?"slow":"fast"); switch (event) { case CIO_NO_PATH: @@ -253,7 +268,7 @@ css_evaluate_subchannel(int irq, int slow) sch->schib.pmcw.intparm = 0; cio_modify(sch); put_device(&sch->dev); - ret = css_probe_device(irq); + ret = css_probe_device(schid); } else { /* * We can't immediately deregister the disconnected @@ -272,7 +287,7 @@ css_evaluate_subchannel(int irq, int slow) device_trigger_reprobe(sch); spin_unlock_irqrestore(&sch->lock, flags); } - ret = sch ? 0 : css_probe_device(irq); + ret = sch ? 0 : css_probe_device(schid); break; default: BUG(); @@ -281,28 +296,15 @@ css_evaluate_subchannel(int irq, int slow) return ret; } -static void -css_rescan_devices(void) +static int +css_rescan_devices(struct subchannel_id schid, void *data) { - int irq, ret; - - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - ret = css_evaluate_subchannel(irq, 1); - /* No more memory. It doesn't make sense to continue. No - * panic because this can happen in midflight and just - * because we can't use a new device is no reason to crash - * the system. */ - if (ret == -ENOMEM) - break; - /* -ENXIO indicates that there are no more subchannels. */ - if (ret == -ENXIO) - break; - } + return css_evaluate_subchannel(schid, 1); } struct slow_subchannel { struct list_head slow_list; - unsigned long schid; + struct subchannel_id schid; }; static LIST_HEAD(slow_subchannels_head); @@ -315,7 +317,7 @@ css_trigger_slow_path(void) if (need_rescan) { need_rescan = 0; - css_rescan_devices(); + for_each_subchannel(css_rescan_devices, NULL); return; } @@ -354,23 +356,31 @@ css_reiterate_subchannels(void) * Called from the machine check handler for subchannel report words. */ int -css_process_crw(int irq) +css_process_crw(int rsid1, int rsid2) { int ret; + struct subchannel_id mchk_schid; - CIO_CRW_EVENT(2, "source is subchannel %04X\n", irq); + CIO_CRW_EVENT(2, "source is subchannel %04X, subsystem id %x\n", + rsid1, rsid2); if (need_rescan) /* We need to iterate all subchannels anyway. */ return -EAGAIN; + + init_subchannel_id(&mchk_schid); + mchk_schid.sch_no = rsid1; + if (rsid2 != 0) + mchk_schid.ssid = (rsid2 >> 8) & 3; + /* * Since we are always presented with IPI in the CRW, we have to * use stsch() to find out if the subchannel in question has come * or gone. */ - ret = css_evaluate_subchannel(irq, 0); + ret = css_evaluate_subchannel(mchk_schid, 0); if (ret == -EAGAIN) { - if (css_enqueue_subchannel_slow(irq)) { + if (css_enqueue_subchannel_slow(mchk_schid)) { css_clear_subchannel_slow_list(); need_rescan = 1; } @@ -378,22 +388,83 @@ css_process_crw(int irq) return ret; } -static void __init -css_generate_pgid(void) +static int __init +__init_channel_subsystem(struct subchannel_id schid, void *data) { - /* Let's build our path group ID here. */ - if (css_characteristics_avail && css_general_characteristics.mcss) - global_pgid.cpu_addr = 0x8000; + struct subchannel *sch; + int ret; + + if (cio_is_console(schid)) + sch = cio_get_console_subchannel(); else { + sch = css_alloc_subchannel(schid); + if (IS_ERR(sch)) + ret = PTR_ERR(sch); + else + ret = 0; + switch (ret) { + case 0: + break; + case -ENOMEM: + panic("Out of memory in init_channel_subsystem\n"); + /* -ENXIO: no more subchannels. */ + case -ENXIO: + return ret; + default: + return 0; + } + } + /* + * We register ALL valid subchannels in ioinfo, even those + * that have been present before init_channel_subsystem. + * These subchannels can't have been registered yet (kmalloc + * not working) so we do it now. This is true e.g. for the + * console subchannel. + */ + css_register_subchannel(sch); + return 0; +} + +static void __init +css_generate_pgid(struct channel_subsystem *css, u32 tod_high) +{ + if (css_characteristics_avail && css_general_characteristics.mcss) { + css->global_pgid.pgid_high.ext_cssid.version = 0x80; + css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid; + } else { #ifdef CONFIG_SMP - global_pgid.cpu_addr = hard_smp_processor_id(); + css->global_pgid.pgid_high.cpu_addr = hard_smp_processor_id(); #else - global_pgid.cpu_addr = 0; + css->global_pgid.pgid_high.cpu_addr = 0; #endif } - global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident; - global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine; - global_pgid.tod_high = (__u32) (get_clock() >> 32); + css->global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident; + css->global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine; + css->global_pgid.tod_high = tod_high; + +} + +static void +channel_subsystem_release(struct device *dev) +{ + struct channel_subsystem *css; + + css = to_css(dev); + kfree(css); +} + +static inline void __init +setup_css(int nr) +{ + u32 tod_high; + + memset(css[nr], 0, sizeof(struct channel_subsystem)); + css[nr]->valid = 1; + css[nr]->cssid = nr; + sprintf(css[nr]->device.bus_id, "css%x", nr); + css[nr]->device.release = channel_subsystem_release; + tod_high = (u32) (get_clock() >> 32); + css_generate_pgid(css[nr], tod_high); } /* @@ -404,53 +475,50 @@ css_generate_pgid(void) static int __init init_channel_subsystem (void) { - int ret, irq; + int ret, i; if (chsc_determine_css_characteristics() == 0) css_characteristics_avail = 1; - css_generate_pgid(); - if ((ret = bus_register(&css_bus_type))) goto out; - if ((ret = device_register (&css_bus_device))) - goto out_bus; + /* Try to enable MSS. */ + ret = chsc_enable_facility(CHSC_SDA_OC_MSS); + switch (ret) { + case 0: /* Success. */ + max_ssid = __MAX_SSID; + break; + case -ENOMEM: + goto out_bus; + default: + max_ssid = 0; + } + /* Setup css structure. */ + for (i = 0; i <= __MAX_CSSID; i++) { + css[i] = kmalloc(sizeof(struct channel_subsystem), GFP_KERNEL); + if (!css[i]) { + ret = -ENOMEM; + goto out_unregister; + } + setup_css(i); + ret = device_register(&css[i]->device); + if (ret) + goto out_free; + } css_init_done = 1; ctl_set_bit(6, 28); - for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) { - struct subchannel *sch; - - if (cio_is_console(irq)) - sch = cio_get_console_subchannel(); - else { - sch = css_alloc_subchannel(irq); - if (IS_ERR(sch)) - ret = PTR_ERR(sch); - else - ret = 0; - if (ret == -ENOMEM) - panic("Out of memory in " - "init_channel_subsystem\n"); - /* -ENXIO: no more subchannels. */ - if (ret == -ENXIO) - break; - if (ret) - continue; - } - /* - * We register ALL valid subchannels in ioinfo, even those - * that have been present before init_channel_subsystem. - * These subchannels can't have been registered yet (kmalloc - * not working) so we do it now. This is true e.g. for the - * console subchannel. - */ - css_register_subchannel(sch); - } + for_each_subchannel(__init_channel_subsystem, NULL); return 0; - +out_free: + kfree(css[i]); +out_unregister: + while (i > 0) { + i--; + device_unregister(&css[i]->device); + } out_bus: bus_unregister(&css_bus_type); out: @@ -481,47 +549,8 @@ struct bus_type css_bus_type = { subsys_initcall(init_channel_subsystem); -/* - * Register root devices for some drivers. The release function must not be - * in the device drivers, so we do it here. - */ -static void -s390_root_dev_release(struct device *dev) -{ - kfree(dev); -} - -struct device * -s390_root_dev_register(const char *name) -{ - struct device *dev; - int ret; - - if (!strlen(name)) - return ERR_PTR(-EINVAL); - dev = kmalloc(sizeof(struct device), GFP_KERNEL); - if (!dev) - return ERR_PTR(-ENOMEM); - memset(dev, 0, sizeof(struct device)); - strncpy(dev->bus_id, name, min(strlen(name), (size_t)BUS_ID_SIZE)); - dev->release = s390_root_dev_release; - ret = device_register(dev); - if (ret) { - kfree(dev); - return ERR_PTR(ret); - } - return dev; -} - -void -s390_root_dev_unregister(struct device *dev) -{ - if (dev) - device_unregister(dev); -} - int -css_enqueue_subchannel_slow(unsigned long schid) +css_enqueue_subchannel_slow(struct subchannel_id schid) { struct slow_subchannel *new_slow_sch; unsigned long flags; @@ -564,6 +593,4 @@ css_slow_subchannels_exist(void) MODULE_LICENSE("GPL"); EXPORT_SYMBOL(css_bus_type); -EXPORT_SYMBOL(s390_root_dev_register); -EXPORT_SYMBOL(s390_root_dev_unregister); EXPORT_SYMBOL_GPL(css_characteristics_avail); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 2004a6c4938..251ebd7a7d3 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -6,6 +6,8 @@ #include <asm/cio.h> +#include "schid.h" + /* * path grouping stuff */ @@ -33,19 +35,25 @@ struct path_state { __u8 resvd : 3; /* reserved */ } __attribute__ ((packed)); +struct extended_cssid { + u8 version; + u8 cssid; +} __attribute__ ((packed)); + struct pgid { union { __u8 fc; /* SPID function code */ struct path_state ps; /* SNID path state */ } inf; - __u32 cpu_addr : 16; /* CPU address */ + union { + __u32 cpu_addr : 16; /* CPU address */ + struct extended_cssid ext_cssid; + } pgid_high; __u32 cpu_id : 24; /* CPU identification */ __u32 cpu_model : 16; /* CPU model */ __u32 tod_high; /* high word TOD clock */ } __attribute__ ((packed)); -extern struct pgid global_pgid; - #define MAX_CIWS 8 /* @@ -68,7 +76,8 @@ struct ccw_device_private { atomic_t onoff; unsigned long registered; __u16 devno; /* device number */ - __u16 irq; /* subchannel number */ + __u16 sch_no; /* subchannel number */ + __u8 ssid; /* subchannel set id */ __u8 imask; /* lpm mask for SNID/SID/SPGID */ int iretry; /* retry counter SNID/SID/SPGID */ struct { @@ -121,15 +130,27 @@ struct css_driver { extern struct bus_type css_bus_type; extern struct css_driver io_subchannel_driver; -int css_probe_device(int irq); -extern struct subchannel * get_subchannel_by_schid(int irq); -extern unsigned int highest_subchannel; +extern int css_probe_device(struct subchannel_id); +extern struct subchannel * get_subchannel_by_schid(struct subchannel_id); extern int css_init_done; - -#define __MAX_SUBCHANNELS 65536 +extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *); + +#define __MAX_SUBCHANNEL 65535 +#define __MAX_SSID 3 +#define __MAX_CHPID 255 +#define __MAX_CSSID 0 + +struct channel_subsystem { + u8 cssid; + int valid; + struct channel_path *chps[__MAX_CHPID]; + struct device device; + struct pgid global_pgid; +}; +#define to_css(dev) container_of(dev, struct channel_subsystem, device) extern struct bus_type css_bus_type; -extern struct device css_bus_device; +extern struct channel_subsystem *css[]; /* Some helper functions for disconnected state. */ int device_is_disconnected(struct subchannel *); @@ -144,7 +165,7 @@ void device_set_waiting(struct subchannel *); void device_kill_pending_timer(struct subchannel *); /* Helper functions to build lists for the slow path. */ -int css_enqueue_subchannel_slow(unsigned long schid); +extern int css_enqueue_subchannel_slow(struct subchannel_id schid); void css_walk_subchannel_slow_list(void (*fn)(unsigned long)); void css_clear_subchannel_slow_list(void); int css_slow_subchannels_exist(void); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 85908cacc3b..fa3e4c0a253 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/device.c * bus driver for ccw devices - * $Revision: 1.131 $ + * $Revision: 1.137 $ * * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -374,7 +374,7 @@ online_store (struct device *dev, struct device_attribute *attr, const char *buf int i, force, ret; char *tmp; - if (atomic_compare_and_swap(0, 1, &cdev->private->onoff)) + if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0) return -EAGAIN; if (cdev->drv && !try_module_get(cdev->drv->owner)) { @@ -535,7 +535,8 @@ ccw_device_register(struct ccw_device *cdev) } struct match_data { - unsigned int devno; + unsigned int devno; + unsigned int ssid; struct ccw_device * sibling; }; @@ -548,6 +549,7 @@ match_devno(struct device * dev, void * data) cdev = to_ccwdev(dev); if ((cdev->private->state == DEV_STATE_DISCONNECTED) && (cdev->private->devno == d->devno) && + (cdev->private->ssid == d->ssid) && (cdev != d->sibling)) { cdev->private->state = DEV_STATE_NOT_OPER; return 1; @@ -556,11 +558,13 @@ match_devno(struct device * dev, void * data) } static struct ccw_device * -get_disc_ccwdev_by_devno(unsigned int devno, struct ccw_device *sibling) +get_disc_ccwdev_by_devno(unsigned int devno, unsigned int ssid, + struct ccw_device *sibling) { struct device *dev; struct match_data data = { - .devno = devno, + .devno = devno, + .ssid = ssid, .sibling = sibling, }; @@ -616,13 +620,13 @@ ccw_device_do_unreg_rereg(void *data) need_rename = 1; other_cdev = get_disc_ccwdev_by_devno(sch->schib.pmcw.dev, - cdev); + sch->schid.ssid, cdev); if (other_cdev) { struct subchannel *other_sch; other_sch = to_subchannel(other_cdev->dev.parent); if (get_device(&other_sch->dev)) { - stsch(other_sch->irq, &other_sch->schib); + stsch(other_sch->schid, &other_sch->schib); if (other_sch->schib.pmcw.dnv) { other_sch->schib.pmcw.intparm = 0; cio_modify(other_sch); @@ -639,8 +643,8 @@ ccw_device_do_unreg_rereg(void *data) if (test_and_clear_bit(1, &cdev->private->registered)) device_del(&cdev->dev); if (need_rename) - snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.0.%04x", - sch->schib.pmcw.dev); + snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", + sch->schid.ssid, sch->schib.pmcw.dev); PREPARE_WORK(&cdev->private->kick_work, ccw_device_add_changed, (void *)cdev); queue_work(ccw_device_work, &cdev->private->kick_work); @@ -769,18 +773,20 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch) sch->dev.driver_data = cdev; sch->driver = &io_subchannel_driver; cdev->ccwlock = &sch->lock; + /* Init private data. */ priv = cdev->private; priv->devno = sch->schib.pmcw.dev; - priv->irq = sch->irq; + priv->ssid = sch->schid.ssid; + priv->sch_no = sch->schid.sch_no; priv->state = DEV_STATE_NOT_OPER; INIT_LIST_HEAD(&priv->cmb_list); init_waitqueue_head(&priv->wait_q); init_timer(&priv->timer); /* Set an initial name for the device. */ - snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.0.%04x", - sch->schib.pmcw.dev); + snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", + sch->schid.ssid, sch->schib.pmcw.dev); /* Increase counter of devices currently in recognition. */ atomic_inc(&ccw_device_init_count); @@ -951,7 +957,7 @@ io_subchannel_shutdown(struct device *dev) sch = to_subchannel(dev); cdev = dev->driver_data; - if (cio_is_console(sch->irq)) + if (cio_is_console(sch->schid)) return; if (!sch->schib.pmcw.ena) /* Nothing to do. */ @@ -986,10 +992,6 @@ ccw_device_console_enable (struct ccw_device *cdev, struct subchannel *sch) cdev->dev = (struct device) { .parent = &sch->dev, }; - /* Initialize the subchannel structure */ - sch->dev.parent = &css_bus_device; - sch->dev.bus = &css_bus_type; - rc = io_subchannel_recog(cdev, sch); if (rc) return rc; @@ -1146,6 +1148,16 @@ ccw_driver_unregister (struct ccw_driver *cdriver) driver_unregister(&cdriver->driver); } +/* Helper func for qdio. */ +struct subchannel_id +ccw_device_get_subchannel_id(struct ccw_device *cdev) +{ + struct subchannel *sch; + + sch = to_subchannel(cdev->dev.parent); + return sch->schid; +} + MODULE_LICENSE("GPL"); EXPORT_SYMBOL(ccw_device_set_online); EXPORT_SYMBOL(ccw_device_set_offline); @@ -1155,3 +1167,4 @@ EXPORT_SYMBOL(get_ccwdev_by_busid); EXPORT_SYMBOL(ccw_bus_type); EXPORT_SYMBOL(ccw_device_work); EXPORT_SYMBOL(ccw_device_notify_work); +EXPORT_SYMBOL_GPL(ccw_device_get_subchannel_id); diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index a3aa056d724..11587ebb728 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -110,6 +110,7 @@ int ccw_device_stlck(struct ccw_device *); /* qdio needs this. */ void ccw_device_set_timeout(struct ccw_device *, int); +extern struct subchannel_id ccw_device_get_subchannel_id(struct ccw_device *); void retry_set_schib(struct ccw_device *cdev); #endif diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index c1c89f4fd4e..23d12b65e5f 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -133,7 +133,7 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev) int ret; sch = to_subchannel(cdev->dev.parent); - ret = stsch(sch->irq, &sch->schib); + ret = stsch(sch->schid, &sch->schib); if (ret || !sch->schib.pmcw.dnv) return -ENODEV; if (!sch->schib.pmcw.ena || sch->schib.scsw.actl == 0) @@ -231,7 +231,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) * through ssch() and the path information is up to date. */ old_lpm = sch->lpm; - stsch(sch->irq, &sch->schib); + stsch(sch->schid, &sch->schib); sch->lpm = sch->schib.pmcw.pim & sch->schib.pmcw.pam & sch->schib.pmcw.pom & @@ -257,8 +257,9 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) switch (state) { case DEV_STATE_NOT_OPER: CIO_DEBUG(KERN_WARNING, 2, - "SenseID : unknown device %04x on subchannel %04x\n", - cdev->private->devno, sch->irq); + "SenseID : unknown device %04x on subchannel " + "0.%x.%04x\n", cdev->private->devno, + sch->schid.ssid, sch->schid.sch_no); break; case DEV_STATE_OFFLINE: if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID) { @@ -282,16 +283,18 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) return; } /* Issue device info message. */ - CIO_DEBUG(KERN_INFO, 2, "SenseID : device %04x reports: " + CIO_DEBUG(KERN_INFO, 2, "SenseID : device 0.%x.%04x reports: " "CU Type/Mod = %04X/%02X, Dev Type/Mod = " - "%04X/%02X\n", cdev->private->devno, + "%04X/%02X\n", + cdev->private->ssid, cdev->private->devno, cdev->id.cu_type, cdev->id.cu_model, cdev->id.dev_type, cdev->id.dev_model); break; case DEV_STATE_BOXED: CIO_DEBUG(KERN_WARNING, 2, - "SenseID : boxed device %04x on subchannel %04x\n", - cdev->private->devno, sch->irq); + "SenseID : boxed device %04x on subchannel " + "0.%x.%04x\n", cdev->private->devno, + sch->schid.ssid, sch->schid.sch_no); break; } cdev->private->state = state; @@ -359,7 +362,7 @@ ccw_device_done(struct ccw_device *cdev, int state) if (state == DEV_STATE_BOXED) CIO_DEBUG(KERN_WARNING, 2, "Boxed device %04x on subchannel %04x\n", - cdev->private->devno, sch->irq); + cdev->private->devno, sch->schid.sch_no); if (cdev->private->flags.donotify) { cdev->private->flags.donotify = 0; @@ -592,7 +595,7 @@ ccw_device_offline(struct ccw_device *cdev) struct subchannel *sch; sch = to_subchannel(cdev->dev.parent); - if (stsch(sch->irq, &sch->schib) || !sch->schib.pmcw.dnv) + if (stsch(sch->schid, &sch->schib) || !sch->schib.pmcw.dnv) return -ENODEV; if (cdev->private->state != DEV_STATE_ONLINE) { if (sch->schib.scsw.actl != 0) @@ -711,7 +714,7 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event) * Since we might not just be coming from an interrupt from the * subchannel we have to update the schib. */ - stsch(sch->irq, &sch->schib); + stsch(sch->schid, &sch->schib); if (sch->schib.scsw.actl != 0 || (cdev->private->irb.scsw.stctl & SCSW_STCTL_STATUS_PEND)) { @@ -923,7 +926,7 @@ ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event) /* Iff device is idle, reset timeout. */ sch = to_subchannel(cdev->dev.parent); - if (!stsch(sch->irq, &sch->schib)) + if (!stsch(sch->schid, &sch->schib)) if (sch->schib.scsw.actl == 0) ccw_device_set_timeout(cdev, 0); /* Call the handler. */ @@ -1035,7 +1038,7 @@ device_trigger_reprobe(struct subchannel *sch) return; /* Update some values. */ - if (stsch(sch->irq, &sch->schib)) + if (stsch(sch->schid, &sch->schib)) return; /* diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index 0e68fb511dc..04ceba343db 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -27,7 +27,7 @@ /* * diag210 is used under VM to get information about a virtual device */ -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT int diag210(struct diag210 * addr) { @@ -256,16 +256,17 @@ ccw_device_check_sense_id(struct ccw_device *cdev) * sense id information. So, for intervention required, * we use the "whack it until it talks" strategy... */ - CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel %04x " - "reports cmd reject\n", - cdev->private->devno, sch->irq); + CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel " + "0.%x.%04x reports cmd reject\n", + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no); return -EOPNOTSUPP; } if (irb->esw.esw0.erw.cons) { - CIO_MSG_EVENT(2, "SenseID : UC on dev %04x, " + CIO_MSG_EVENT(2, "SenseID : UC on dev 0.%x.%04x, " "lpum %02X, cnt %02d, sns :" " %02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->devno, + cdev->private->ssid, cdev->private->devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -277,16 +278,17 @@ ccw_device_check_sense_id(struct ccw_device *cdev) if (irb->scsw.cc == 3) { if ((sch->orb.lpm & sch->schib.pmcw.pim & sch->schib.pmcw.pam) != 0) - CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x on" - " subchannel %04x is 'not operational'\n", - sch->orb.lpm, cdev->private->devno, - sch->irq); + CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x " + "on subchannel 0.%x.%04x is " + "'not operational'\n", sch->orb.lpm, + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no); return -EACCES; } /* Hmm, whatever happened, try again. */ CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on " - "subchannel %04x returns status %02X%02X\n", - cdev->private->devno, sch->irq, + "subchannel 0.%x.%04x returns status %02X%02X\n", + cdev->private->devno, sch->schid.ssid, sch->schid.sch_no, irb->scsw.dstat, irb->scsw.cstat); return -EAGAIN; } diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 85a3026e690..143b6c25a4e 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/device_ops.c * - * $Revision: 1.57 $ + * $Revision: 1.58 $ * * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -570,7 +570,7 @@ ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no) int _ccw_device_get_subchannel_number(struct ccw_device *cdev) { - return cdev->private->irq; + return cdev->private->sch_no; } int diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index 0adac8a6733..052832d03d3 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -22,6 +22,7 @@ #include "cio_debug.h" #include "css.h" #include "device.h" +#include "ioasm.h" /* * Start Sense Path Group ID helper function. Used in ccw_device_recog @@ -56,10 +57,10 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev) if (ret != -EACCES) return ret; CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel " - "%04x, lpm %02X, became 'not " + "0.%x.%04x, lpm %02X, became 'not " "operational'\n", - cdev->private->devno, sch->irq, - cdev->private->imask); + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no, cdev->private->imask); } cdev->private->imask >>= 1; @@ -105,10 +106,10 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) return -EOPNOTSUPP; } if (irb->esw.esw0.erw.cons) { - CIO_MSG_EVENT(2, "SNID - device %04x, unit check, " + CIO_MSG_EVENT(2, "SNID - device 0.%x.%04x, unit check, " "lpum %02X, cnt %02d, sns : " "%02X%02X%02X%02X %02X%02X%02X%02X ...\n", - cdev->private->devno, + cdev->private->ssid, cdev->private->devno, irb->esw.esw0.sublog.lpum, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], @@ -118,15 +119,17 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev) return -EAGAIN; } if (irb->scsw.cc == 3) { - CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel " - "%04x, lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->irq, sch->orb.lpm); + CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x," + " lpm %02X, became 'not operational'\n", + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no, sch->orb.lpm); return -EACCES; } if (cdev->private->pgid.inf.ps.state2 == SNID_STATE2_RESVD_ELSE) { - CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel %04x " + CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x " "is reserved by someone else\n", - cdev->private->devno, sch->irq); + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no); return -EUSERS; } return 0; @@ -162,7 +165,7 @@ ccw_device_sense_pgid_irq(struct ccw_device *cdev, enum dev_event dev_event) /* 0, -ETIME, -EOPNOTSUPP, -EAGAIN, -EACCES or -EUSERS */ case 0: /* Sense Path Group ID successful. */ if (cdev->private->pgid.inf.ps.state1 == SNID_STATE1_RESET) - memcpy(&cdev->private->pgid, &global_pgid, + memcpy(&cdev->private->pgid, &css[0]->global_pgid, sizeof(struct pgid)); ccw_device_sense_pgid_done(cdev, 0); break; @@ -235,8 +238,9 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func) sch->lpm &= ~cdev->private->imask; sch->vpm &= ~cdev->private->imask; CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel " - "%04x, lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->irq, cdev->private->imask); + "0.%x.%04x, lpm %02X, became 'not operational'\n", + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no, cdev->private->imask); return ret; } @@ -258,8 +262,10 @@ __ccw_device_check_pgid(struct ccw_device *cdev) if (irb->ecw[0] & SNS0_CMD_REJECT) return -EOPNOTSUPP; /* Hmm, whatever happened, try again. */ - CIO_MSG_EVENT(2, "SPID - device %04x, unit check, cnt %02d, " + CIO_MSG_EVENT(2, "SPID - device 0.%x.%04x, unit check, " + "cnt %02d, " "sns : %02X%02X%02X%02X %02X%02X%02X%02X ...\n", + cdev->private->ssid, cdev->private->devno, irb->esw.esw0.erw.scnt, irb->ecw[0], irb->ecw[1], irb->ecw[2], irb->ecw[3], @@ -268,10 +274,10 @@ __ccw_device_check_pgid(struct ccw_device *cdev) return -EAGAIN; } if (irb->scsw.cc == 3) { - CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel " - "%04x, lpm %02X, became 'not operational'\n", - cdev->private->devno, sch->irq, - cdev->private->imask); + CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x," + " lpm %02X, became 'not operational'\n", + cdev->private->devno, sch->schid.ssid, + sch->schid.sch_no, cdev->private->imask); return -EACCES; } return 0; @@ -364,8 +370,22 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event) void ccw_device_verify_start(struct ccw_device *cdev) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); + cdev->private->flags.pgid_single = 0; cdev->private->iretry = 5; + /* + * Update sch->lpm with current values to catch paths becoming + * available again. + */ + if (stsch(sch->schid, &sch->schib)) { + ccw_device_verify_done(cdev, -ENODEV); + return; + } + sch->lpm = sch->schib.pmcw.pim & + sch->schib.pmcw.pam & + sch->schib.pmcw.pom & + sch->opm; __ccw_device_verify_start(cdev); } diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index 12a24d4331a..db09c209098 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -36,15 +36,16 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) CIO_MSG_EVENT(0, "Channel-Check or Interface-Control-Check " "received" - " ... device %04X on subchannel %04X, dev_stat " + " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->devno, cdev->private->irq, + cdev->private->devno, cdev->private->ssid, + cdev->private->sch_no, irb->scsw.dstat, irb->scsw.cstat); if (irb->scsw.cc != 3) { char dbf_text[15]; - sprintf(dbf_text, "chk%x", cdev->private->irq); + sprintf(dbf_text, "chk%x", cdev->private->sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof (struct irb)); } @@ -59,10 +60,11 @@ ccw_device_path_notoper(struct ccw_device *cdev) struct subchannel *sch; sch = to_subchannel(cdev->dev.parent); - stsch (sch->irq, &sch->schib); + stsch (sch->schid, &sch->schib); - CIO_MSG_EVENT(0, "%s(%04x) - path(s) %02x are " - "not operational \n", __FUNCTION__, sch->irq, + CIO_MSG_EVENT(0, "%s(0.%x.%04x) - path(s) %02x are " + "not operational \n", __FUNCTION__, + sch->schid.ssid, sch->schid.sch_no, sch->schib.pmcw.pnom); sch->lpm &= ~sch->schib.pmcw.pnom; diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h index 45480a2bc4c..95a9462f9a9 100644 --- a/drivers/s390/cio/ioasm.h +++ b/drivers/s390/cio/ioasm.h @@ -1,12 +1,13 @@ #ifndef S390_CIO_IOASM_H #define S390_CIO_IOASM_H +#include "schid.h" + /* * TPI info structure */ struct tpi_info { - __u32 reserved1 : 16; /* reserved 0x00000001 */ - __u32 irq : 16; /* aka. subchannel number */ + struct subchannel_id schid; __u32 intparm; /* interruption parameter */ __u32 adapter_IO : 1; __u32 reserved2 : 1; @@ -21,7 +22,8 @@ struct tpi_info { * Some S390 specific IO instructions as inline */ -static inline int stsch(int irq, volatile struct schib *addr) +static inline int stsch(struct subchannel_id schid, + volatile struct schib *addr) { int ccode; @@ -31,12 +33,42 @@ static inline int stsch(int irq, volatile struct schib *addr) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000), "a" (addr) + : "d" (schid), "a" (addr), "m" (*addr) + : "cc", "1" ); + return ccode; +} + +static inline int stsch_err(struct subchannel_id schid, + volatile struct schib *addr) +{ + int ccode; + + __asm__ __volatile__( + " lhi %0,%3\n" + " lr 1,%1\n" + " stsch 0(%2)\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" +#ifdef CONFIG_64BIT + ".section __ex_table,\"a\"\n" + " .align 8\n" + " .quad 0b,1b\n" + ".previous" +#else + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,1b\n" + ".previous" +#endif + : "=&d" (ccode) + : "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr) : "cc", "1" ); return ccode; } -static inline int msch(int irq, volatile struct schib *addr) +static inline int msch(struct subchannel_id schid, + volatile struct schib *addr) { int ccode; @@ -46,12 +78,13 @@ static inline int msch(int irq, volatile struct schib *addr) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L), "a" (addr) + : "d" (schid), "a" (addr), "m" (*addr) : "cc", "1" ); return ccode; } -static inline int msch_err(int irq, volatile struct schib *addr) +static inline int msch_err(struct subchannel_id schid, + volatile struct schib *addr) { int ccode; @@ -62,7 +95,7 @@ static inline int msch_err(int irq, volatile struct schib *addr) "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 8\n" " .quad 0b,1b\n" @@ -74,12 +107,13 @@ static inline int msch_err(int irq, volatile struct schib *addr) ".previous" #endif : "=&d" (ccode) - : "d" (irq | 0x10000L), "a" (addr), "K" (-EIO) + : "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr) : "cc", "1" ); return ccode; } -static inline int tsch(int irq, volatile struct irb *addr) +static inline int tsch(struct subchannel_id schid, + volatile struct irb *addr) { int ccode; @@ -89,7 +123,7 @@ static inline int tsch(int irq, volatile struct irb *addr) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L), "a" (addr) + : "d" (schid), "a" (addr), "m" (*addr) : "cc", "1" ); return ccode; } @@ -103,12 +137,13 @@ static inline int tpi( volatile struct tpi_info *addr) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "a" (addr) + : "a" (addr), "m" (*addr) : "cc", "1" ); return ccode; } -static inline int ssch(int irq, volatile struct orb *addr) +static inline int ssch(struct subchannel_id schid, + volatile struct orb *addr) { int ccode; @@ -118,12 +153,12 @@ static inline int ssch(int irq, volatile struct orb *addr) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L), "a" (addr) + : "d" (schid), "a" (addr), "m" (*addr) : "cc", "1" ); return ccode; } -static inline int rsch(int irq) +static inline int rsch(struct subchannel_id schid) { int ccode; @@ -133,12 +168,12 @@ static inline int rsch(int irq) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L) + : "d" (schid) : "cc", "1" ); return ccode; } -static inline int csch(int irq) +static inline int csch(struct subchannel_id schid) { int ccode; @@ -148,12 +183,12 @@ static inline int csch(int irq) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L) + : "d" (schid) : "cc", "1" ); return ccode; } -static inline int hsch(int irq) +static inline int hsch(struct subchannel_id schid) { int ccode; @@ -163,12 +198,12 @@ static inline int hsch(int irq) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L) + : "d" (schid) : "cc", "1" ); return ccode; } -static inline int xsch(int irq) +static inline int xsch(struct subchannel_id schid) { int ccode; @@ -178,21 +213,22 @@ static inline int xsch(int irq) " ipm %0\n" " srl %0,28" : "=d" (ccode) - : "d" (irq | 0x10000L) + : "d" (schid) : "cc", "1" ); return ccode; } static inline int chsc(void *chsc_area) { + typedef struct { char _[4096]; } addr_type; int cc; __asm__ __volatile__ ( - ".insn rre,0xb25f0000,%1,0 \n\t" + ".insn rre,0xb25f0000,%2,0 \n\t" "ipm %0 \n\t" "srl %0,28 \n\t" - : "=d" (cc) - : "d" (chsc_area) + : "=d" (cc), "=m" (*(addr_type *) chsc_area) + : "d" (chsc_area), "m" (*(addr_type *) chsc_area) : "cc" ); return cc; diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index eb39218b925..30a836ffc31 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -56,7 +56,7 @@ #include "ioasm.h" #include "chsc.h" -#define VERSION_QDIO_C "$Revision: 1.108 $" +#define VERSION_QDIO_C "$Revision: 1.114 $" /****************** MODULE PARAMETER VARIABLES ********************/ MODULE_AUTHOR("Utz Bacher <utz.bacher@de.ibm.com>"); @@ -76,6 +76,7 @@ static struct qdio_perf_stats perf_stats; #endif /* QDIO_PERFORMANCE_STATS */ static int hydra_thinints; +static int is_passthrough = 0; static int omit_svs; static int indicator_used[INDICATORS_PER_CACHELINE]; @@ -136,12 +137,126 @@ qdio_release_q(struct qdio_q *q) atomic_dec(&q->use_count); } -static volatile inline void -qdio_set_slsb(volatile char *slsb, unsigned char value) +/*check ccq */ +static inline int +qdio_check_ccq(struct qdio_q *q, unsigned int ccq) +{ + char dbf_text[15]; + + if (ccq == 0 || ccq == 32 || ccq == 96) + return 0; + if (ccq == 97) + return 1; + /*notify devices immediately*/ + sprintf(dbf_text,"%d", ccq); + QDIO_DBF_TEXT2(1,trace,dbf_text); + return -EIO; +} +/* EQBS: extract buffer states */ +static inline int +qdio_do_eqbs(struct qdio_q *q, unsigned char *state, + unsigned int *start, unsigned int *cnt) +{ + struct qdio_irq *irq; + unsigned int tmp_cnt, q_no, ccq; + int rc ; + char dbf_text[15]; + + ccq = 0; + tmp_cnt = *cnt; + irq = (struct qdio_irq*)q->irq_ptr; + q_no = q->q_no; + if(!q->is_input_q) + q_no += irq->no_input_qs; + ccq = do_eqbs(irq->sch_token, state, q_no, start, cnt); + rc = qdio_check_ccq(q, ccq); + if (rc < 0) { + QDIO_DBF_TEXT2(1,trace,"eqberr"); + sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt, *cnt, ccq, q_no); + QDIO_DBF_TEXT2(1,trace,dbf_text); + q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| + QDIO_STATUS_LOOK_FOR_ERROR, + 0, 0, 0, -1, -1, q->int_parm); + return 0; + } + return (tmp_cnt - *cnt); +} + +/* SQBS: set buffer states */ +static inline int +qdio_do_sqbs(struct qdio_q *q, unsigned char state, + unsigned int *start, unsigned int *cnt) { - xchg((char*)slsb,value); + struct qdio_irq *irq; + unsigned int tmp_cnt, q_no, ccq; + int rc; + char dbf_text[15]; + + ccq = 0; + tmp_cnt = *cnt; + irq = (struct qdio_irq*)q->irq_ptr; + q_no = q->q_no; + if(!q->is_input_q) + q_no += irq->no_input_qs; + ccq = do_sqbs(irq->sch_token, state, q_no, start, cnt); + rc = qdio_check_ccq(q, ccq); + if (rc < 0) { + QDIO_DBF_TEXT3(1,trace,"sqberr"); + sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt,*cnt,ccq,q_no); + QDIO_DBF_TEXT3(1,trace,dbf_text); + q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION| + QDIO_STATUS_LOOK_FOR_ERROR, + 0, 0, 0, -1, -1, q->int_parm); + return 0; + } + return (tmp_cnt - *cnt); } +static inline int +qdio_set_slsb(struct qdio_q *q, unsigned int *bufno, + unsigned char state, unsigned int *count) +{ + volatile char *slsb; + struct qdio_irq *irq; + + irq = (struct qdio_irq*)q->irq_ptr; + if (!irq->is_qebsm) { + slsb = (char *)&q->slsb.acc.val[(*bufno)]; + xchg(slsb, state); + return 1; + } + return qdio_do_sqbs(q, state, bufno, count); +} + +#ifdef CONFIG_QDIO_DEBUG +static inline void +qdio_trace_slsb(struct qdio_q *q) +{ + if (q->queue_type==QDIO_TRACE_QTYPE) { + if (q->is_input_q) + QDIO_DBF_HEX2(0,slsb_in,&q->slsb, + QDIO_MAX_BUFFERS_PER_Q); + else + QDIO_DBF_HEX2(0,slsb_out,&q->slsb, + QDIO_MAX_BUFFERS_PER_Q); + } +} +#endif + +static inline int +set_slsb(struct qdio_q *q, unsigned int *bufno, + unsigned char state, unsigned int *count) +{ + int rc; +#ifdef CONFIG_QDIO_DEBUG + qdio_trace_slsb(q); +#endif + rc = qdio_set_slsb(q, bufno, state, count); +#ifdef CONFIG_QDIO_DEBUG + qdio_trace_slsb(q); +#endif + return rc; +} static inline int qdio_siga_sync(struct qdio_q *q, unsigned int gpr2, unsigned int gpr3) @@ -155,7 +270,7 @@ qdio_siga_sync(struct qdio_q *q, unsigned int gpr2, perf_stats.siga_syncs++; #endif /* QDIO_PERFORMANCE_STATS */ - cc = do_siga_sync(q->irq, gpr2, gpr3); + cc = do_siga_sync(q->schid, gpr2, gpr3); if (cc) QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); @@ -170,6 +285,23 @@ qdio_siga_sync_q(struct qdio_q *q) return qdio_siga_sync(q, q->mask, 0); } +static int +__do_siga_output(struct qdio_q *q, unsigned int *busy_bit) +{ + struct qdio_irq *irq; + unsigned int fc = 0; + unsigned long schid; + + irq = (struct qdio_irq *) q->irq_ptr; + if (!irq->is_qebsm) + schid = *((u32 *)&q->schid); + else { + schid = irq->sch_token; + fc |= 0x80; + } + return do_siga_output(schid, q->mask, busy_bit, fc); +} + /* * returns QDIO_SIGA_ERROR_ACCESS_EXCEPTION as cc, when SIGA returns * an access exception @@ -189,7 +321,7 @@ qdio_siga_output(struct qdio_q *q) QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); for (;;) { - cc = do_siga_output(q->irq, q->mask, &busy_bit); + cc = __do_siga_output(q, &busy_bit); //QDIO_PRINT_ERR("cc=%x, busy=%x\n",cc,busy_bit); if ((cc==2) && (busy_bit) && (q->is_iqdio_q)) { if (!start_time) @@ -221,7 +353,7 @@ qdio_siga_input(struct qdio_q *q) perf_stats.siga_ins++; #endif /* QDIO_PERFORMANCE_STATS */ - cc = do_siga_input(q->irq, q->mask); + cc = do_siga_input(q->schid, q->mask); if (cc) QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*)); @@ -230,7 +362,7 @@ qdio_siga_input(struct qdio_q *q) } /* locked by the locks in qdio_activate and qdio_cleanup */ -static __u32 volatile * +static __u32 * qdio_get_indicator(void) { int i; @@ -258,7 +390,7 @@ qdio_put_indicator(__u32 *addr) atomic_dec(&spare_indicator_usecount); } -static inline volatile void +static inline void tiqdio_clear_summary_bit(__u32 *location) { QDIO_DBF_TEXT5(0,trace,"clrsummb"); @@ -267,7 +399,7 @@ tiqdio_clear_summary_bit(__u32 *location) xchg(location,0); } -static inline volatile void +static inline void tiqdio_set_summary_bit(__u32 *location) { QDIO_DBF_TEXT5(0,trace,"setsummb"); @@ -336,7 +468,9 @@ static inline int qdio_stop_polling(struct qdio_q *q) { #ifdef QDIO_USE_PROCESSING_STATE - int gsf; + unsigned int tmp, gsf, count = 1; + unsigned char state = 0; + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; if (!atomic_swap(&q->polling,0)) return 1; @@ -348,17 +482,22 @@ qdio_stop_polling(struct qdio_q *q) if (!q->is_input_q) return 1; - gsf=GET_SAVED_FRONTIER(q); - set_slsb(&q->slsb.acc.val[(gsf+QDIO_MAX_BUFFERS_PER_Q-1)& - (QDIO_MAX_BUFFERS_PER_Q-1)], - SLSB_P_INPUT_NOT_INIT); + tmp = gsf = GET_SAVED_FRONTIER(q); + tmp = ((tmp + QDIO_MAX_BUFFERS_PER_Q-1) & (QDIO_MAX_BUFFERS_PER_Q-1) ); + set_slsb(q, &tmp, SLSB_P_INPUT_NOT_INIT, &count); + /* * we don't issue this SYNC_MEMORY, as we trust Rick T and * moreover will not use the PROCESSING state under VM, so * q->polling was 0 anyway */ /*SYNC_MEMORY;*/ - if (q->slsb.acc.val[gsf]!=SLSB_P_INPUT_PRIMED) + if (irq->is_qebsm) { + count = 1; + qdio_do_eqbs(q, &state, &gsf, &count); + } else + state = q->slsb.acc.val[gsf]; + if (state != SLSB_P_INPUT_PRIMED) return 1; /* * set our summary bit again, as otherwise there is a @@ -431,18 +570,136 @@ tiqdio_clear_global_summary(void) /************************* OUTBOUND ROUTINES *******************************/ +static int +qdio_qebsm_get_outbound_buffer_frontier(struct qdio_q *q) +{ + struct qdio_irq *irq; + unsigned char state; + unsigned int cnt, count, ftc; + + irq = (struct qdio_irq *) q->irq_ptr; + if ((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis)) + SYNC_MEMORY; + + ftc = q->first_to_check; + count = qdio_min(atomic_read(&q->number_of_buffers_used), + (QDIO_MAX_BUFFERS_PER_Q-1)); + if (count == 0) + return q->first_to_check; + cnt = qdio_do_eqbs(q, &state, &ftc, &count); + if (cnt == 0) + return q->first_to_check; + switch (state) { + case SLSB_P_OUTPUT_ERROR: + QDIO_DBF_TEXT3(0,trace,"outperr"); + atomic_sub(cnt , &q->number_of_buffers_used); + if (q->qdio_error) + q->error_status_flags |= + QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; + q->qdio_error = SLSB_P_OUTPUT_ERROR; + q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; + q->first_to_check = ftc; + break; + case SLSB_P_OUTPUT_EMPTY: + QDIO_DBF_TEXT5(0,trace,"outpempt"); + atomic_sub(cnt, &q->number_of_buffers_used); + q->first_to_check = ftc; + break; + case SLSB_CU_OUTPUT_PRIMED: + /* all buffers primed */ + QDIO_DBF_TEXT5(0,trace,"outpprim"); + break; + default: + break; + } + QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); + return q->first_to_check; +} + +static int +qdio_qebsm_get_inbound_buffer_frontier(struct qdio_q *q) +{ + struct qdio_irq *irq; + unsigned char state; + int tmp, ftc, count, cnt; + char dbf_text[15]; + + + irq = (struct qdio_irq *) q->irq_ptr; + ftc = q->first_to_check; + count = qdio_min(atomic_read(&q->number_of_buffers_used), + (QDIO_MAX_BUFFERS_PER_Q-1)); + if (count == 0) + return q->first_to_check; + cnt = qdio_do_eqbs(q, &state, &ftc, &count); + if (cnt == 0) + return q->first_to_check; + switch (state) { + case SLSB_P_INPUT_ERROR : +#ifdef CONFIG_QDIO_DEBUG + QDIO_DBF_TEXT3(1,trace,"inperr"); + sprintf(dbf_text,"%2x,%2x",ftc,count); + QDIO_DBF_TEXT3(1,trace,dbf_text); +#endif /* CONFIG_QDIO_DEBUG */ + if (q->qdio_error) + q->error_status_flags |= + QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR; + q->qdio_error = SLSB_P_INPUT_ERROR; + q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR; + atomic_sub(cnt, &q->number_of_buffers_used); + q->first_to_check = ftc; + break; + case SLSB_P_INPUT_PRIMED : + QDIO_DBF_TEXT3(0,trace,"inptprim"); + sprintf(dbf_text,"%2x,%2x",ftc,count); + QDIO_DBF_TEXT3(1,trace,dbf_text); + tmp = 0; + ftc = q->first_to_check; +#ifdef QDIO_USE_PROCESSING_STATE + if (cnt > 1) { + cnt -= 1; + tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); + if (!tmp) + break; + } + cnt = 1; + tmp += set_slsb(q, &ftc, + SLSB_P_INPUT_PROCESSING, &cnt); + atomic_set(&q->polling, 1); +#else + tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt); +#endif + atomic_sub(tmp, &q->number_of_buffers_used); + q->first_to_check = ftc; + break; + case SLSB_CU_INPUT_EMPTY: + case SLSB_P_INPUT_NOT_INIT: + case SLSB_P_INPUT_PROCESSING: + QDIO_DBF_TEXT5(0,trace,"inpnipro"); + break; + default: + break; + } + QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); + return q->first_to_check; +} static inline int qdio_get_outbound_buffer_frontier(struct qdio_q *q) { - int f,f_mod_no; - volatile char *slsb; - int first_not_to_check; + struct qdio_irq *irq; + volatile char *slsb; + unsigned int count = 1; + int first_not_to_check, f, f_mod_no; char dbf_text[15]; QDIO_DBF_TEXT4(0,trace,"getobfro"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); + irq = (struct qdio_irq *) q->irq_ptr; + if (irq->is_qebsm) + return qdio_qebsm_get_outbound_buffer_frontier(q); + slsb=&q->slsb.acc.val[0]; f_mod_no=f=q->first_to_check; /* @@ -484,7 +741,7 @@ check_next: QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); /* kind of process the buffer */ - set_slsb(&q->slsb.acc.val[f_mod_no], SLSB_P_OUTPUT_NOT_INIT); + set_slsb(q, &f_mod_no, SLSB_P_OUTPUT_NOT_INIT, &count); /* * we increment the frontier, as this buffer @@ -597,48 +854,48 @@ qdio_kick_outbound_q(struct qdio_q *q) result=qdio_siga_output(q); - switch (result) { - case 0: - /* went smooth this time, reset timestamp */ + switch (result) { + case 0: + /* went smooth this time, reset timestamp */ #ifdef CONFIG_QDIO_DEBUG - QDIO_DBF_TEXT3(0,trace,"cc2reslv"); - sprintf(dbf_text,"%4x%2x%2x",q->irq,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); + QDIO_DBF_TEXT3(0,trace,"cc2reslv"); + sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no, + atomic_read(&q->busy_siga_counter)); + QDIO_DBF_TEXT3(0,trace,dbf_text); #endif /* CONFIG_QDIO_DEBUG */ - q->timing.busy_start=0; + q->timing.busy_start=0; + break; + case (2|QDIO_SIGA_ERROR_B_BIT_SET): + /* cc=2 and busy bit: */ + atomic_inc(&q->busy_siga_counter); + + /* if the last siga was successful, save + * timestamp here */ + if (!q->timing.busy_start) + q->timing.busy_start=NOW; + + /* if we're in time, don't touch error_status_flags + * and siga_error */ + if (NOW-q->timing.busy_start<QDIO_BUSY_BIT_GIVE_UP) { + qdio_mark_q(q); break; - case (2|QDIO_SIGA_ERROR_B_BIT_SET): - /* cc=2 and busy bit: */ - atomic_inc(&q->busy_siga_counter); - - /* if the last siga was successful, save - * timestamp here */ - if (!q->timing.busy_start) - q->timing.busy_start=NOW; - - /* if we're in time, don't touch error_status_flags - * and siga_error */ - if (NOW-q->timing.busy_start<QDIO_BUSY_BIT_GIVE_UP) { - qdio_mark_q(q); - break; - } - QDIO_DBF_TEXT2(0,trace,"cc2REPRT"); + } + QDIO_DBF_TEXT2(0,trace,"cc2REPRT"); #ifdef CONFIG_QDIO_DEBUG - sprintf(dbf_text,"%4x%2x%2x",q->irq,q->q_no, - atomic_read(&q->busy_siga_counter)); - QDIO_DBF_TEXT3(0,trace,dbf_text); + sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no, + atomic_read(&q->busy_siga_counter)); + QDIO_DBF_TEXT3(0,trace,dbf_text); #endif /* CONFIG_QDIO_DEBUG */ - /* else fallthrough and report error */ - default: - /* for plain cc=1, 2 or 3: */ - if (q->siga_error) - q->error_status_flags|= - QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; + /* else fallthrough and report error */ + default: + /* for plain cc=1, 2 or 3: */ + if (q->siga_error) q->error_status_flags|= - QDIO_STATUS_LOOK_FOR_ERROR; - q->siga_error=result; - } + QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR; + q->error_status_flags|= + QDIO_STATUS_LOOK_FOR_ERROR; + q->siga_error=result; + } } static inline void @@ -743,8 +1000,10 @@ qdio_outbound_processing(struct qdio_q *q) static inline int qdio_get_inbound_buffer_frontier(struct qdio_q *q) { + struct qdio_irq *irq; int f,f_mod_no; volatile char *slsb; + unsigned int count = 1; int first_not_to_check; #ifdef CONFIG_QDIO_DEBUG char dbf_text[15]; @@ -756,6 +1015,10 @@ qdio_get_inbound_buffer_frontier(struct qdio_q *q) QDIO_DBF_TEXT4(0,trace,"getibfro"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); + irq = (struct qdio_irq *) q->irq_ptr; + if (irq->is_qebsm) + return qdio_qebsm_get_inbound_buffer_frontier(q); + slsb=&q->slsb.acc.val[0]; f_mod_no=f=q->first_to_check; /* @@ -792,19 +1055,19 @@ check_next: * kill VM in terms of CP overhead */ if (q->siga_sync) { - set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT); + set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); } else { /* set the previous buffer to NOT_INIT. The current * buffer will be set to PROCESSING at the end of * this function to avoid further interrupts. */ if (last_position>=0) - set_slsb(&slsb[last_position], - SLSB_P_INPUT_NOT_INIT); + set_slsb(q, &last_position, + SLSB_P_INPUT_NOT_INIT, &count); atomic_set(&q->polling,1); last_position=f_mod_no; } #else /* QDIO_USE_PROCESSING_STATE */ - set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT); + set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); #endif /* QDIO_USE_PROCESSING_STATE */ /* * not needed, as the inbound queue will be synced on the next @@ -829,7 +1092,7 @@ check_next: QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256); /* kind of process the buffer */ - set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT); + set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count); if (q->qdio_error) q->error_status_flags|= @@ -857,7 +1120,7 @@ out: #ifdef QDIO_USE_PROCESSING_STATE if (last_position>=0) - set_slsb(&slsb[last_position],SLSB_P_INPUT_PROCESSING); + set_slsb(q, &last_position, SLSB_P_INPUT_NOT_INIT, &count); #endif /* QDIO_USE_PROCESSING_STATE */ QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); @@ -902,6 +1165,10 @@ static inline int tiqdio_is_inbound_q_done(struct qdio_q *q) { int no_used; + unsigned int start_buf, count; + unsigned char state = 0; + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; + #ifdef CONFIG_QDIO_DEBUG char dbf_text[15]; #endif @@ -927,8 +1194,13 @@ tiqdio_is_inbound_q_done(struct qdio_q *q) if (!q->siga_sync) /* we'll check for more primed buffers in qeth_stop_polling */ return 0; - - if (q->slsb.acc.val[q->first_to_check]!=SLSB_P_INPUT_PRIMED) + if (irq->is_qebsm) { + count = 1; + start_buf = q->first_to_check; + qdio_do_eqbs(q, &state, &start_buf, &count); + } else + state = q->slsb.acc.val[q->first_to_check]; + if (state != SLSB_P_INPUT_PRIMED) /* * nothing more to do, if next buffer is not PRIMED. * note that we did a SYNC_MEMORY before, that there @@ -955,6 +1227,10 @@ static inline int qdio_is_inbound_q_done(struct qdio_q *q) { int no_used; + unsigned int start_buf, count; + unsigned char state = 0; + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; + #ifdef CONFIG_QDIO_DEBUG char dbf_text[15]; #endif @@ -973,8 +1249,13 @@ qdio_is_inbound_q_done(struct qdio_q *q) QDIO_DBF_TEXT4(0,trace,dbf_text); return 1; } - - if (q->slsb.acc.val[q->first_to_check]==SLSB_P_INPUT_PRIMED) { + if (irq->is_qebsm) { + count = 1; + start_buf = q->first_to_check; + qdio_do_eqbs(q, &state, &start_buf, &count); + } else + state = q->slsb.acc.val[q->first_to_check]; + if (state == SLSB_P_INPUT_PRIMED) { /* we got something to do */ QDIO_DBF_TEXT4(0,trace,"inqisntA"); QDIO_DBF_HEX4(0,trace,&q,sizeof(void*)); @@ -1456,7 +1737,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, void *ptr; int available; - sprintf(dbf_text,"qfqs%4x",cdev->private->irq); + sprintf(dbf_text,"qfqs%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); for (i=0;i<no_input_qs;i++) { q=irq_ptr->input_qs[i]; @@ -1476,7 +1757,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, q->queue_type=q_format; q->int_parm=int_parm; - q->irq=irq_ptr->irq; + q->schid = irq_ptr->schid; q->irq_ptr = irq_ptr; q->cdev = cdev; q->mask=1<<(31-i); @@ -1523,11 +1804,11 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); /* fill in slsb */ - for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++) { - set_slsb(&q->slsb.acc.val[j], - SLSB_P_INPUT_NOT_INIT); -/* q->sbal[j]->element[1].sbalf.i1.key=QDIO_STORAGE_KEY;*/ - } + if (!irq_ptr->is_qebsm) { + unsigned int count = 1; + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + set_slsb(q, &j, SLSB_P_INPUT_NOT_INIT, &count); + } } for (i=0;i<no_output_qs;i++) { @@ -1549,7 +1830,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, q->queue_type=q_format; q->int_parm=int_parm; q->is_input_q=0; - q->irq=irq_ptr->irq; + q->schid = irq_ptr->schid; q->cdev = cdev; q->irq_ptr = irq_ptr; q->mask=1<<(31-i); @@ -1584,11 +1865,11 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*)); /* fill in slsb */ - for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++) { - set_slsb(&q->slsb.acc.val[j], - SLSB_P_OUTPUT_NOT_INIT); -/* q->sbal[j]->element[1].sbalf.i1.key=QDIO_STORAGE_KEY;*/ - } + if (!irq_ptr->is_qebsm) { + unsigned int count = 1; + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) + set_slsb(q, &j, SLSB_P_OUTPUT_NOT_INIT, &count); + } } } @@ -1656,7 +1937,7 @@ qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state) char dbf_text[15]; QDIO_DBF_TEXT5(0,trace,"newstate"); - sprintf(dbf_text,"%4x%4x",irq_ptr->irq,state); + sprintf(dbf_text,"%4x%4x",irq_ptr->schid.sch_no,state); QDIO_DBF_TEXT5(0,trace,dbf_text); #endif /* CONFIG_QDIO_DEBUG */ @@ -1669,12 +1950,12 @@ qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state) } static inline void -qdio_irq_check_sense(int irq, struct irb *irb) +qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb) { char dbf_text[15]; if (irb->esw.esw0.erw.cons) { - sprintf(dbf_text,"sens%4x",irq); + sprintf(dbf_text,"sens%4x",schid.sch_no); QDIO_DBF_TEXT2(1,trace,dbf_text); QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN); @@ -1785,21 +2066,22 @@ qdio_timeout_handler(struct ccw_device *cdev) switch (irq_ptr->state) { case QDIO_IRQ_STATE_INACTIVE: - QDIO_PRINT_ERR("establish queues on irq %04x: timed out\n", - irq_ptr->irq); + QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: timed out\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); QDIO_DBF_TEXT2(1,setup,"eq:timeo"); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); break; case QDIO_IRQ_STATE_CLEANUP: - QDIO_PRINT_INFO("Did not get interrupt on cleanup, irq=0x%x.\n", - irq_ptr->irq); + QDIO_PRINT_INFO("Did not get interrupt on cleanup, " + "irq=0.%x.%x.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); break; case QDIO_IRQ_STATE_ESTABLISHED: case QDIO_IRQ_STATE_ACTIVE: /* I/O has been terminated by common I/O layer. */ - QDIO_PRINT_INFO("Queues on irq %04x killed by cio.\n", - irq_ptr->irq); + QDIO_PRINT_INFO("Queues on irq 0.%x.%04x killed by cio.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); QDIO_DBF_TEXT2(1, trace, "cio:term"); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); if (get_device(&cdev->dev)) { @@ -1862,7 +2144,7 @@ qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) } } - qdio_irq_check_sense(irq_ptr->irq, irb); + qdio_irq_check_sense(irq_ptr->schid, irb); #ifdef CONFIG_QDIO_DEBUG sprintf(dbf_text, "state:%d", irq_ptr->state); @@ -1905,7 +2187,7 @@ int qdio_synchronize(struct ccw_device *cdev, unsigned int flags, unsigned int queue_number) { - int cc; + int cc = 0; struct qdio_q *q; struct qdio_irq *irq_ptr; void *ptr; @@ -1918,7 +2200,7 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags, return -ENODEV; #ifdef CONFIG_QDIO_DEBUG - *((int*)(&dbf_text[4])) = irq_ptr->irq; + *((int*)(&dbf_text[4])) = irq_ptr->schid.sch_no; QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN); *((int*)(&dbf_text[0]))=flags; *((int*)(&dbf_text[4]))=queue_number; @@ -1929,12 +2211,14 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags, q=irq_ptr->input_qs[queue_number]; if (!q) return -EINVAL; - cc = do_siga_sync(q->irq, 0, q->mask); + if (!(irq_ptr->is_qebsm)) + cc = do_siga_sync(q->schid, 0, q->mask); } else if (flags&QDIO_FLAG_SYNC_OUTPUT) { q=irq_ptr->output_qs[queue_number]; if (!q) return -EINVAL; - cc = do_siga_sync(q->irq, q->mask, 0); + if (!(irq_ptr->is_qebsm)) + cc = do_siga_sync(q->schid, q->mask, 0); } else return -EINVAL; @@ -1945,15 +2229,54 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags, return cc; } -static unsigned char -qdio_check_siga_needs(int sch) +static inline void +qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned char qdioac, + unsigned long token) +{ + struct qdio_q *q; + int i; + unsigned int count, start_buf; + char dbf_text[15]; + + /*check if QEBSM is disabled */ + if (!(irq_ptr->is_qebsm) || !(qdioac & 0x01)) { + irq_ptr->is_qebsm = 0; + irq_ptr->sch_token = 0; + irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; + QDIO_DBF_TEXT0(0,setup,"noV=V"); + return; + } + irq_ptr->sch_token = token; + /*input queue*/ + for (i = 0; i < irq_ptr->no_input_qs;i++) { + q = irq_ptr->input_qs[i]; + count = QDIO_MAX_BUFFERS_PER_Q; + start_buf = 0; + set_slsb(q, &start_buf, SLSB_P_INPUT_NOT_INIT, &count); + } + sprintf(dbf_text,"V=V:%2x",irq_ptr->is_qebsm); + QDIO_DBF_TEXT0(0,setup,dbf_text); + sprintf(dbf_text,"%8lx",irq_ptr->sch_token); + QDIO_DBF_TEXT0(0,setup,dbf_text); + /*output queue*/ + for (i = 0; i < irq_ptr->no_output_qs; i++) { + q = irq_ptr->output_qs[i]; + count = QDIO_MAX_BUFFERS_PER_Q; + start_buf = 0; + set_slsb(q, &start_buf, SLSB_P_OUTPUT_NOT_INIT, &count); + } +} + +static void +qdio_get_ssqd_information(struct qdio_irq *irq_ptr) { int result; unsigned char qdioac; - struct { struct chsc_header request; - u16 reserved1; + u16 reserved1:10; + u16 ssid:2; + u16 fmt:4; u16 first_sch; u16 reserved2; u16 last_sch; @@ -1964,67 +2287,83 @@ qdio_check_siga_needs(int sch) u8 reserved5; u16 sch; u8 qfmt; - u8 reserved6; - u8 qdioac; + u8 parm; + u8 qdioac1; u8 sch_class; u8 reserved7; u8 icnt; u8 reserved8; u8 ocnt; + u8 reserved9; + u8 mbccnt; + u16 qdioac2; + u64 sch_token; } *ssqd_area; + QDIO_DBF_TEXT0(0,setup,"getssqd"); + qdioac = 0; ssqd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!ssqd_area) { QDIO_PRINT_WARN("Could not get memory for chsc. Using all " \ - "SIGAs for sch x%x.\n", sch); - return CHSC_FLAG_SIGA_INPUT_NECESSARY || - CHSC_FLAG_SIGA_OUTPUT_NECESSARY || - CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ + "SIGAs for sch x%x.\n", irq_ptr->schid.sch_no); + irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY || + CHSC_FLAG_SIGA_OUTPUT_NECESSARY || + CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ + irq_ptr->is_qebsm = 0; + irq_ptr->sch_token = 0; + irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM; + return; } + ssqd_area->request = (struct chsc_header) { .length = 0x0010, .code = 0x0024, }; - - ssqd_area->first_sch = sch; - ssqd_area->last_sch = sch; - - result=chsc(ssqd_area); + ssqd_area->first_sch = irq_ptr->schid.sch_no; + ssqd_area->last_sch = irq_ptr->schid.sch_no; + ssqd_area->ssid = irq_ptr->schid.ssid; + result = chsc(ssqd_area); if (result) { QDIO_PRINT_WARN("CHSC returned cc %i. Using all " \ - "SIGAs for sch x%x.\n", - result,sch); + "SIGAs for sch 0.%x.%x.\n", result, + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY || CHSC_FLAG_SIGA_OUTPUT_NECESSARY || CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ + irq_ptr->is_qebsm = 0; goto out; } if (ssqd_area->response.code != QDIO_CHSC_RESPONSE_CODE_OK) { QDIO_PRINT_WARN("response upon checking SIGA needs " \ - "is 0x%x. Using all SIGAs for sch x%x.\n", - ssqd_area->response.code, sch); + "is 0x%x. Using all SIGAs for sch 0.%x.%x.\n", + ssqd_area->response.code, + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY || CHSC_FLAG_SIGA_OUTPUT_NECESSARY || CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */ + irq_ptr->is_qebsm = 0; goto out; } if (!(ssqd_area->flags & CHSC_FLAG_QDIO_CAPABILITY) || !(ssqd_area->flags & CHSC_FLAG_VALIDITY) || - (ssqd_area->sch != sch)) { - QDIO_PRINT_WARN("huh? problems checking out sch x%x... " \ - "using all SIGAs.\n",sch); + (ssqd_area->sch != irq_ptr->schid.sch_no)) { + QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \ + "using all SIGAs.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY | CHSC_FLAG_SIGA_OUTPUT_NECESSARY | CHSC_FLAG_SIGA_SYNC_NECESSARY; /* worst case */ + irq_ptr->is_qebsm = 0; goto out; } - - qdioac = ssqd_area->qdioac; + qdioac = ssqd_area->qdioac1; out: + qdio_check_subchannel_qebsm(irq_ptr, qdioac, + ssqd_area->sch_token); free_page ((unsigned long) ssqd_area); - return qdioac; + irq_ptr->qdioac = qdioac; } static unsigned int @@ -2055,6 +2394,13 @@ tiqdio_check_chsc_availability(void) sprintf(dbf_text,"hydrati%1x", hydra_thinints); QDIO_DBF_TEXT0(0,setup,dbf_text); +#ifdef CONFIG_64BIT + /* Check for QEBSM support in general (bit 58). */ + is_passthrough = css_general_characteristics.qebsm; +#endif + sprintf(dbf_text,"cssQBS:%1x", is_passthrough); + QDIO_DBF_TEXT0(0,setup,dbf_text); + /* Check for aif time delay disablement fac (bit 56). If installed, * omit svs even under lpar (good point by rick again) */ omit_svs = css_general_characteristics.aif_tdd; @@ -2091,7 +2437,7 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) /* set to 0x10000000 to enable * time delay disablement facility */ u32 reserved5; - u32 subsystem_id; + struct subchannel_id schid; u32 reserved6[1004]; struct chsc_header response; u32 reserved7; @@ -2113,7 +2459,8 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) scssc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!scssc_area) { QDIO_PRINT_WARN("No memory for setting indicators on " \ - "subchannel x%x.\n", irq_ptr->irq); + "subchannel 0.%x.%x.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); return -ENOMEM; } scssc_area->request = (struct chsc_header) { @@ -2127,7 +2474,7 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) scssc_area->ks = QDIO_STORAGE_KEY; scssc_area->kc = QDIO_STORAGE_KEY; scssc_area->isc = TIQDIO_THININT_ISC; - scssc_area->subsystem_id = (1<<16) + irq_ptr->irq; + scssc_area->schid = irq_ptr->schid; /* enables the time delay disablement facility. Don't care * whether it is really there (i.e. we haven't checked for * it) */ @@ -2137,12 +2484,11 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero) QDIO_PRINT_WARN("Time delay disablement facility " \ "not available\n"); - - result = chsc(scssc_area); if (result) { - QDIO_PRINT_WARN("could not set indicators on irq x%x, " \ - "cc=%i.\n",irq_ptr->irq,result); + QDIO_PRINT_WARN("could not set indicators on irq 0.%x.%x, " \ + "cc=%i.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no,result); result = -EIO; goto out; } @@ -2198,7 +2544,8 @@ tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target) scsscf_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!scsscf_area) { QDIO_PRINT_WARN("No memory for setting delay target on " \ - "subchannel x%x.\n", irq_ptr->irq); + "subchannel 0.%x.%x.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); return -ENOMEM; } scsscf_area->request = (struct chsc_header) { @@ -2210,8 +2557,10 @@ tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target) result=chsc(scsscf_area); if (result) { - QDIO_PRINT_WARN("could not set delay target on irq x%x, " \ - "cc=%i. Continuing.\n",irq_ptr->irq,result); + QDIO_PRINT_WARN("could not set delay target on irq 0.%x.%x, " \ + "cc=%i. Continuing.\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no, + result); result = -EIO; goto out; } @@ -2245,7 +2594,7 @@ qdio_cleanup(struct ccw_device *cdev, int how) if (!irq_ptr) return -ENODEV; - sprintf(dbf_text,"qcln%4x",irq_ptr->irq); + sprintf(dbf_text,"qcln%4x",irq_ptr->schid.sch_no); QDIO_DBF_TEXT1(0,trace,dbf_text); QDIO_DBF_TEXT0(0,setup,dbf_text); @@ -2272,7 +2621,7 @@ qdio_shutdown(struct ccw_device *cdev, int how) down(&irq_ptr->setting_up_sema); - sprintf(dbf_text,"qsqs%4x",irq_ptr->irq); + sprintf(dbf_text,"qsqs%4x",irq_ptr->schid.sch_no); QDIO_DBF_TEXT1(0,trace,dbf_text); QDIO_DBF_TEXT0(0,setup,dbf_text); @@ -2378,7 +2727,7 @@ qdio_free(struct ccw_device *cdev) down(&irq_ptr->setting_up_sema); - sprintf(dbf_text,"qfqs%4x",irq_ptr->irq); + sprintf(dbf_text,"qfqs%4x",irq_ptr->schid.sch_no); QDIO_DBF_TEXT1(0,trace,dbf_text); QDIO_DBF_TEXT0(0,setup,dbf_text); @@ -2526,13 +2875,14 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat, irq_ptr = cdev->private->qdio_data; if (cstat || (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END))) { - sprintf(dbf_text,"ick1%4x",irq_ptr->irq); + sprintf(dbf_text,"ick1%4x",irq_ptr->schid.sch_no); QDIO_DBF_TEXT2(1,trace,dbf_text); QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int)); QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int)); QDIO_PRINT_ERR("received check condition on establish " \ - "queues on irq 0x%x (cs=x%x, ds=x%x).\n", - irq_ptr->irq,cstat,dstat); + "queues on irq 0.%x.%x (cs=x%x, ds=x%x).\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no, + cstat,dstat); qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ERR); } @@ -2540,9 +2890,10 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat, QDIO_DBF_TEXT2(1,setup,"eq:no de"); QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq %04x: didn't get " + QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: didn't get " "device end: dstat=%02x, cstat=%02x\n", - irq_ptr->irq, dstat, cstat); + irq_ptr->schid.ssid, irq_ptr->schid.sch_no, + dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); return 1; } @@ -2551,10 +2902,10 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat, QDIO_DBF_TEXT2(1,setup,"eq:badio"); QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat)); QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat)); - QDIO_PRINT_ERR("establish queues on irq %04x: got " + QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: got " "the following devstat: dstat=%02x, " - "cstat=%02x\n", - irq_ptr->irq, dstat, cstat); + "cstat=%02x\n", irq_ptr->schid.ssid, + irq_ptr->schid.sch_no, dstat, cstat); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR); return 1; } @@ -2569,7 +2920,7 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat) irq_ptr = cdev->private->qdio_data; - sprintf(dbf_text,"qehi%4x",cdev->private->irq); + sprintf(dbf_text,"qehi%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2588,7 +2939,7 @@ qdio_initialize(struct qdio_initialize *init_data) int rc; char dbf_text[15]; - sprintf(dbf_text,"qini%4x",init_data->cdev->private->irq); + sprintf(dbf_text,"qini%4x",init_data->cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2609,7 +2960,7 @@ qdio_allocate(struct qdio_initialize *init_data) struct qdio_irq *irq_ptr; char dbf_text[15]; - sprintf(dbf_text,"qalc%4x",init_data->cdev->private->irq); + sprintf(dbf_text,"qalc%4x",init_data->cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) || @@ -2682,7 +3033,7 @@ int qdio_fill_irq(struct qdio_initialize *init_data) irq_ptr->int_parm=init_data->int_parm; - irq_ptr->irq = init_data->cdev->private->irq; + irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev); irq_ptr->no_input_qs=init_data->no_input_qs; irq_ptr->no_output_qs=init_data->no_output_qs; @@ -2698,11 +3049,12 @@ int qdio_fill_irq(struct qdio_initialize *init_data) QDIO_DBF_TEXT2(0,setup,dbf_text); if (irq_ptr->is_thinint_irq) { - irq_ptr->dev_st_chg_ind=qdio_get_indicator(); + irq_ptr->dev_st_chg_ind = qdio_get_indicator(); QDIO_DBF_HEX1(0,setup,&irq_ptr->dev_st_chg_ind,sizeof(void*)); if (!irq_ptr->dev_st_chg_ind) { QDIO_PRINT_WARN("no indicator location available " \ - "for irq 0x%x\n",irq_ptr->irq); + "for irq 0.%x.%x\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no); qdio_release_irq_memory(irq_ptr); return -ENOBUFS; } @@ -2747,6 +3099,10 @@ int qdio_fill_irq(struct qdio_initialize *init_data) irq_ptr->qdr->qkey=QDIO_STORAGE_KEY; /* fill in qib */ + irq_ptr->is_qebsm = is_passthrough; + if (irq_ptr->is_qebsm) + irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM; + irq_ptr->qib.qfmt=init_data->q_format; if (init_data->no_input_qs) irq_ptr->qib.isliba=(unsigned long)(irq_ptr->input_qs[0]->slib); @@ -2829,7 +3185,7 @@ qdio_establish(struct qdio_initialize *init_data) tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET); } - sprintf(dbf_text,"qest%4x",cdev->private->irq); + sprintf(dbf_text,"qest%4x",cdev->private->sch_no); QDIO_DBF_TEXT0(0,setup,dbf_text); QDIO_DBF_TEXT0(0,trace,dbf_text); @@ -2855,9 +3211,10 @@ qdio_establish(struct qdio_initialize *init_data) sprintf(dbf_text,"eq:io%4x",result); QDIO_DBF_TEXT2(1,setup,dbf_text); } - QDIO_PRINT_WARN("establish queues on irq %04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->irq,result,result2); + QDIO_PRINT_WARN("establish queues on irq 0.%x.%04x: do_IO " \ + "returned %i, next try returned %i\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no, + result, result2); result=result2; if (result) ccw_device_set_timeout(cdev, 0); @@ -2884,7 +3241,7 @@ qdio_establish(struct qdio_initialize *init_data) return -EIO; } - irq_ptr->qdioac=qdio_check_siga_needs(irq_ptr->irq); + qdio_get_ssqd_information(irq_ptr); /* if this gets set once, we're running under VM and can omit SVSes */ if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY) omit_svs=1; @@ -2930,7 +3287,7 @@ qdio_activate(struct ccw_device *cdev, int flags) goto out; } - sprintf(dbf_text,"qact%4x", irq_ptr->irq); + sprintf(dbf_text,"qact%4x", irq_ptr->schid.sch_no); QDIO_DBF_TEXT2(0,setup,dbf_text); QDIO_DBF_TEXT2(0,trace,dbf_text); @@ -2955,9 +3312,10 @@ qdio_activate(struct ccw_device *cdev, int flags) sprintf(dbf_text,"aq:io%4x",result); QDIO_DBF_TEXT2(1,setup,dbf_text); } - QDIO_PRINT_WARN("activate queues on irq %04x: do_IO " \ - "returned %i, next try returned %i\n", - irq_ptr->irq,result,result2); + QDIO_PRINT_WARN("activate queues on irq 0.%x.%04x: do_IO " \ + "returned %i, next try returned %i\n", + irq_ptr->schid.ssid, irq_ptr->schid.sch_no, + result, result2); result=result2; } @@ -3015,30 +3373,40 @@ static inline void qdio_do_qdio_fill_input(struct qdio_q *q, unsigned int qidx, unsigned int count, struct qdio_buffer *buffers) { + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; + qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); + if (irq->is_qebsm) { + while (count) + set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); + return; + } for (;;) { - set_slsb(&q->slsb.acc.val[qidx],SLSB_CU_INPUT_EMPTY); + set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count); count--; if (!count) break; - qidx=(qidx+1)&(QDIO_MAX_BUFFERS_PER_Q-1); + qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); } - - /* not necessary, as the queues are synced during the SIGA read */ - /*SYNC_MEMORY;*/ } static inline void qdio_do_qdio_fill_output(struct qdio_q *q, unsigned int qidx, unsigned int count, struct qdio_buffer *buffers) { + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; + + qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1); + if (irq->is_qebsm) { + while (count) + set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); + return; + } + for (;;) { - set_slsb(&q->slsb.acc.val[qidx],SLSB_CU_OUTPUT_PRIMED); + set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count); count--; if (!count) break; - qidx=(qidx+1)&(QDIO_MAX_BUFFERS_PER_Q-1); + qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1); } - - /* SIGA write will sync the queues */ - /*SYNC_MEMORY;*/ } static inline void @@ -3083,6 +3451,9 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, struct qdio_buffer *buffers) { int used_elements; + unsigned int cnt, start_buf; + unsigned char state = 0; + struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr; /* This is the outbound handling of queues */ #ifdef QDIO_PERFORMANCE_STATS @@ -3115,9 +3486,15 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags, * SYNC_MEMORY :-/ ), we try to * fast-requeue buffers */ - if (q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1) - &(QDIO_MAX_BUFFERS_PER_Q-1)]!= - SLSB_CU_OUTPUT_PRIMED) { + if (irq->is_qebsm) { + cnt = 1; + start_buf = ((qidx+QDIO_MAX_BUFFERS_PER_Q-1) & + (QDIO_MAX_BUFFERS_PER_Q-1)); + qdio_do_eqbs(q, &state, &start_buf, &cnt); + } else + state = q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1) + &(QDIO_MAX_BUFFERS_PER_Q-1) ]; + if (state != SLSB_CU_OUTPUT_PRIMED) { qdio_kick_outbound_q(q); } else { QDIO_DBF_TEXT3(0,trace, "fast-req"); @@ -3150,7 +3527,7 @@ do_QDIO(struct ccw_device *cdev,unsigned int callflags, #ifdef CONFIG_QDIO_DEBUG char dbf_text[20]; - sprintf(dbf_text,"doQD%04x",cdev->private->irq); + sprintf(dbf_text,"doQD%04x",cdev->private->sch_no); QDIO_DBF_TEXT3(0,trace,dbf_text); #endif /* CONFIG_QDIO_DEBUG */ diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 328e31cc685..fa385e761fe 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -3,14 +3,15 @@ #include <asm/page.h> -#define VERSION_CIO_QDIO_H "$Revision: 1.33 $" +#include "schid.h" + +#define VERSION_CIO_QDIO_H "$Revision: 1.40 $" #ifdef CONFIG_QDIO_DEBUG #define QDIO_VERBOSE_LEVEL 9 #else /* CONFIG_QDIO_DEBUG */ #define QDIO_VERBOSE_LEVEL 5 #endif /* CONFIG_QDIO_DEBUG */ - #define QDIO_USE_PROCESSING_STATE #ifdef CONFIG_QDIO_PERF_STATS @@ -265,12 +266,64 @@ QDIO_PRINT_##importance(header "%02x %02x %02x %02x %02x %02x %02x %02x " \ /* * Some instructions as assembly */ + +static inline int +do_sqbs(unsigned long sch, unsigned char state, int queue, + unsigned int *start, unsigned int *count) +{ +#ifdef CONFIG_64BIT + register unsigned long _ccq asm ("0") = *count; + register unsigned long _sch asm ("1") = sch; + unsigned long _queuestart = ((unsigned long)queue << 32) | *start; + + asm volatile ( + " .insn rsy,0xeb000000008A,%1,0,0(%2)\n\t" + : "+d" (_ccq), "+d" (_queuestart) + : "d" ((unsigned long)state), "d" (_sch) + : "memory", "cc" + ); + *count = _ccq & 0xff; + *start = _queuestart & 0xff; + + return (_ccq >> 32) & 0xff; +#else + return 0; +#endif +} + +static inline int +do_eqbs(unsigned long sch, unsigned char *state, int queue, + unsigned int *start, unsigned int *count) +{ +#ifdef CONFIG_64BIT + register unsigned long _ccq asm ("0") = *count; + register unsigned long _sch asm ("1") = sch; + unsigned long _queuestart = ((unsigned long)queue << 32) | *start; + unsigned long _state = 0; + + asm volatile ( + " .insn rrf,0xB99c0000,%1,%2,0,0 \n\t" + : "+d" (_ccq), "+d" (_queuestart), "+d" (_state) + : "d" (_sch) + : "memory", "cc" + ); + *count = _ccq & 0xff; + *start = _queuestart & 0xff; + *state = _state & 0xff; + + return (_ccq >> 32) & 0xff; +#else + return 0; +#endif +} + + static inline int -do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2) +do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) { int cc; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,2 \n\t" "lr 1,%1 \n\t" @@ -280,10 +333,10 @@ do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2) "ipm %0 \n\t" "srl %0,28 \n\t" : "=d" (cc) - : "d" (0x10000|irq), "d" (mask1), "d" (mask2) + : "d" (schid), "d" (mask1), "d" (mask2) : "cc", "0", "1", "2", "3" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 0,2 \n\t" "llgfr 1,%1 \n\t" @@ -293,19 +346,19 @@ do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2) "ipm %0 \n\t" "srl %0,28 \n\t" : "=d" (cc) - : "d" (0x10000|irq), "d" (mask1), "d" (mask2) + : "d" (schid), "d" (mask1), "d" (mask2) : "cc", "0", "1", "2", "3" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return cc; } static inline int -do_siga_input(unsigned int irq, unsigned int mask) +do_siga_input(struct subchannel_id schid, unsigned int mask) { int cc; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,1 \n\t" "lr 1,%1 \n\t" @@ -314,10 +367,10 @@ do_siga_input(unsigned int irq, unsigned int mask) "ipm %0 \n\t" "srl %0,28 \n\t" : "=d" (cc) - : "d" (0x10000|irq), "d" (mask) + : "d" (schid), "d" (mask) : "cc", "0", "1", "2", "memory" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 0,1 \n\t" "llgfr 1,%1 \n\t" @@ -326,21 +379,22 @@ do_siga_input(unsigned int irq, unsigned int mask) "ipm %0 \n\t" "srl %0,28 \n\t" : "=d" (cc) - : "d" (0x10000|irq), "d" (mask) + : "d" (schid), "d" (mask) : "cc", "0", "1", "2", "memory" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return cc; } static inline int -do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb) +do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, + unsigned int fc) { int cc; __u32 busy_bit; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,0 \n\t" "lr 1,%2 \n\t" @@ -366,14 +420,14 @@ do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb) ".long 0b,2b \n\t" ".previous \n\t" : "=d" (cc), "=d" (busy_bit) - : "d" (0x10000|irq), "d" (mask), + : "d" (schid), "d" (mask), "i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION) : "cc", "0", "1", "2", "memory" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( - "lghi 0,0 \n\t" - "llgfr 1,%2 \n\t" + "llgfr 0,%5 \n\t" + "lgr 1,%2 \n\t" "llgfr 2,%3 \n\t" "siga 0 \n\t" "0:" @@ -391,11 +445,11 @@ do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb) ".quad 0b,1b \n\t" ".previous \n\t" : "=d" (cc), "=d" (busy_bit) - : "d" (0x10000|irq), "d" (mask), - "i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION) + : "d" (schid), "d" (mask), + "i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION), "d" (fc) : "cc", "0", "1", "2", "memory" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ (*bb) = busy_bit; return cc; @@ -407,21 +461,21 @@ do_clear_global_summary(void) unsigned long time; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 1,3 \n\t" ".insn rre,0xb2650000,2,0 \n\t" "lr %0,3 \n\t" : "=d" (time) : : "cc", "1", "2", "3" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 1,3 \n\t" ".insn rre,0xb2650000,2,0 \n\t" "lgr %0,3 \n\t" : "=d" (time) : : "cc", "1", "2", "3" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return time; } @@ -488,42 +542,21 @@ struct qdio_perf_stats { #define MY_MODULE_STRING(x) #x -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT #define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x) -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define QDIO_GET_ADDR(x) ((__u32)(long)x) -#endif /* CONFIG_ARCH_S390X */ - -#ifdef CONFIG_QDIO_DEBUG -#define set_slsb(x,y) \ - if(q->queue_type==QDIO_TRACE_QTYPE) { \ - if(q->is_input_q) { \ - QDIO_DBF_HEX2(0,slsb_in,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \ - } else { \ - QDIO_DBF_HEX2(0,slsb_out,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \ - } \ - } \ - qdio_set_slsb(x,y); \ - if(q->queue_type==QDIO_TRACE_QTYPE) { \ - if(q->is_input_q) { \ - QDIO_DBF_HEX2(0,slsb_in,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \ - } else { \ - QDIO_DBF_HEX2(0,slsb_out,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \ - } \ - } -#else /* CONFIG_QDIO_DEBUG */ -#define set_slsb(x,y) qdio_set_slsb(x,y) -#endif /* CONFIG_QDIO_DEBUG */ +#endif /* CONFIG_64BIT */ struct qdio_q { volatile struct slsb slsb; char unused[QDIO_MAX_BUFFERS_PER_Q]; - __u32 * volatile dev_st_chg_ind; + __u32 * dev_st_chg_ind; int is_input_q; - int irq; + struct subchannel_id schid; struct ccw_device *cdev; unsigned int is_iqdio_q; @@ -568,6 +601,7 @@ struct qdio_q { struct tasklet_struct tasklet; #endif /* QDIO_USE_TIMERS_FOR_POLLING */ + enum qdio_irq_states state; /* used to store the error condition during a data transfer */ @@ -617,13 +651,17 @@ struct qdio_irq { __u32 * volatile dev_st_chg_ind; unsigned long int_parm; - int irq; + struct subchannel_id schid; unsigned int is_iqdio_irq; unsigned int is_thinint_irq; unsigned int hydra_gives_outbound_pcis; unsigned int sync_done_on_outb_pcis; + /* QEBSM facility */ + unsigned int is_qebsm; + unsigned long sch_token; + enum qdio_irq_states state; unsigned int no_input_qs; diff --git a/drivers/s390/cio/schid.h b/drivers/s390/cio/schid.h new file mode 100644 index 00000000000..54328fec5ad --- /dev/null +++ b/drivers/s390/cio/schid.h @@ -0,0 +1,26 @@ +#ifndef S390_SCHID_H +#define S390_SCHID_H + +struct subchannel_id { + __u32 reserved:13; + __u32 ssid:2; + __u32 one:1; + __u32 sch_no:16; +} __attribute__ ((packed,aligned(4))); + + +/* Helper function for sane state of pre-allocated subchannel_id. */ +static inline void +init_subchannel_id(struct subchannel_id *schid) +{ + memset(schid, 0, sizeof(struct subchannel_id)); + schid->one = 1; +} + +static inline int +schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) +{ + return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); +} + +#endif /* S390_SCHID_H */ diff --git a/drivers/s390/crypto/z90common.h b/drivers/s390/crypto/z90common.h index e319e78b5ea..f87c785f203 100644 --- a/drivers/s390/crypto/z90common.h +++ b/drivers/s390/crypto/z90common.h @@ -1,9 +1,9 @@ /* * linux/drivers/s390/crypto/z90common.h * - * z90crypt 1.3.2 + * z90crypt 1.3.3 * - * Copyright (C) 2001, 2004 IBM Corporation + * Copyright (C) 2001, 2005 IBM Corporation * Author(s): Robert Burroughs (burrough@us.ibm.com) * Eric Rossman (edrossma@us.ibm.com) * @@ -91,12 +91,13 @@ enum hdstat { #define TSQ_FATAL_ERROR 34 #define RSQ_FATAL_ERROR 35 -#define Z90CRYPT_NUM_TYPES 5 +#define Z90CRYPT_NUM_TYPES 6 #define PCICA 0 #define PCICC 1 #define PCIXCC_MCL2 2 #define PCIXCC_MCL3 3 #define CEX2C 4 +#define CEX2A 5 #define NILDEV -1 #define ANYDEV -1 #define PCIXCC_UNK -2 @@ -105,7 +106,7 @@ enum hdevice_type { PCICC_HW = 3, PCICA_HW = 4, PCIXCC_HW = 5, - OTHER_HW = 6, + CEX2A_HW = 6, CEX2C_HW = 7 }; diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h index 0a3bb5a10dd..3a18443fdfa 100644 --- a/drivers/s390/crypto/z90crypt.h +++ b/drivers/s390/crypto/z90crypt.h @@ -1,9 +1,9 @@ /* * linux/drivers/s390/crypto/z90crypt.h * - * z90crypt 1.3.2 + * z90crypt 1.3.3 * - * Copyright (C) 2001, 2004 IBM Corporation + * Copyright (C) 2001, 2005 IBM Corporation * Author(s): Robert Burroughs (burrough@us.ibm.com) * Eric Rossman (edrossma@us.ibm.com) * @@ -29,11 +29,11 @@ #include <linux/ioctl.h> -#define VERSION_Z90CRYPT_H "$Revision: 1.11 $" +#define VERSION_Z90CRYPT_H "$Revision: 1.2.2.4 $" #define z90crypt_VERSION 1 #define z90crypt_RELEASE 3 // 2 = PCIXCC, 3 = rewrite for coding standards -#define z90crypt_VARIANT 2 // 2 = added PCIXCC MCL3 and CEX2C support +#define z90crypt_VARIANT 3 // 3 = CEX2A support /** * struct ica_rsa_modexpo @@ -122,6 +122,9 @@ struct ica_rsa_modexpo_crt { * Z90STAT_CEX2CCOUNT * Return an integer count of all CEX2Cs. * + * Z90STAT_CEX2ACOUNT + * Return an integer count of all CEX2As. + * * Z90STAT_REQUESTQ_COUNT * Return an integer count of the number of entries waiting to be * sent to a device. @@ -144,6 +147,7 @@ struct ica_rsa_modexpo_crt { * 0x03: PCIXCC_MCL2 * 0x04: PCIXCC_MCL3 * 0x05: CEX2C + * 0x06: CEX2A * 0x0d: device is disabled via the proc filesystem * * Z90STAT_QDEPTH_MASK @@ -199,6 +203,7 @@ struct ica_rsa_modexpo_crt { #define Z90STAT_PCIXCCMCL2COUNT _IOR(Z90_IOCTL_MAGIC, 0x4b, int) #define Z90STAT_PCIXCCMCL3COUNT _IOR(Z90_IOCTL_MAGIC, 0x4c, int) #define Z90STAT_CEX2CCOUNT _IOR(Z90_IOCTL_MAGIC, 0x4d, int) +#define Z90STAT_CEX2ACOUNT _IOR(Z90_IOCTL_MAGIC, 0x4e, int) #define Z90STAT_REQUESTQ_COUNT _IOR(Z90_IOCTL_MAGIC, 0x44, int) #define Z90STAT_PENDINGQ_COUNT _IOR(Z90_IOCTL_MAGIC, 0x45, int) #define Z90STAT_TOTALOPEN_COUNT _IOR(Z90_IOCTL_MAGIC, 0x46, int) diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c index c215e088973..d7f7494a0cb 100644 --- a/drivers/s390/crypto/z90hardware.c +++ b/drivers/s390/crypto/z90hardware.c @@ -1,9 +1,9 @@ /* * linux/drivers/s390/crypto/z90hardware.c * - * z90crypt 1.3.2 + * z90crypt 1.3.3 * - * Copyright (C) 2001, 2004 IBM Corporation + * Copyright (C) 2001, 2005 IBM Corporation * Author(s): Robert Burroughs (burrough@us.ibm.com) * Eric Rossman (edrossma@us.ibm.com) * @@ -648,6 +648,87 @@ static struct cca_public_sec static_cca_pub_sec = { #define RESPONSE_CPRB_SIZE 0x000006B8 #define RESPONSE_CPRBX_SIZE 0x00000724 +struct type50_hdr { + u8 reserved1; + u8 msg_type_code; + u16 msg_len; + u8 reserved2; + u8 ignored; + u16 reserved3; +}; + +#define TYPE50_TYPE_CODE 0x50 + +#define TYPE50_MEB1_LEN (sizeof(struct type50_meb1_msg)) +#define TYPE50_MEB2_LEN (sizeof(struct type50_meb2_msg)) +#define TYPE50_CRB1_LEN (sizeof(struct type50_crb1_msg)) +#define TYPE50_CRB2_LEN (sizeof(struct type50_crb2_msg)) + +#define TYPE50_MEB1_FMT 0x0001 +#define TYPE50_MEB2_FMT 0x0002 +#define TYPE50_CRB1_FMT 0x0011 +#define TYPE50_CRB2_FMT 0x0012 + +struct type50_meb1_msg { + struct type50_hdr header; + u16 keyblock_type; + u8 reserved[6]; + u8 exponent[128]; + u8 modulus[128]; + u8 message[128]; +}; + +struct type50_meb2_msg { + struct type50_hdr header; + u16 keyblock_type; + u8 reserved[6]; + u8 exponent[256]; + u8 modulus[256]; + u8 message[256]; +}; + +struct type50_crb1_msg { + struct type50_hdr header; + u16 keyblock_type; + u8 reserved[6]; + u8 p[64]; + u8 q[64]; + u8 dp[64]; + u8 dq[64]; + u8 u[64]; + u8 message[128]; +}; + +struct type50_crb2_msg { + struct type50_hdr header; + u16 keyblock_type; + u8 reserved[6]; + u8 p[128]; + u8 q[128]; + u8 dp[128]; + u8 dq[128]; + u8 u[128]; + u8 message[256]; +}; + +union type50_msg { + struct type50_meb1_msg meb1; + struct type50_meb2_msg meb2; + struct type50_crb1_msg crb1; + struct type50_crb2_msg crb2; +}; + +struct type80_hdr { + u8 reserved1; + u8 type; + u16 len; + u8 code; + u8 reserved2[3]; + u8 reserved3[8]; +}; + +#define TYPE80_RSP_CODE 0x80 + struct error_hdr { unsigned char reserved1; unsigned char type; @@ -657,6 +738,7 @@ struct error_hdr { }; #define TYPE82_RSP_CODE 0x82 +#define TYPE88_RSP_CODE 0x88 #define REP82_ERROR_MACHINE_FAILURE 0x10 #define REP82_ERROR_PREEMPT_FAILURE 0x12 @@ -679,6 +761,22 @@ struct error_hdr { #define REP82_ERROR_PACKET_TRUNCATED 0xA0 #define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 +#define REP88_ERROR_MODULE_FAILURE 0x10 +#define REP88_ERROR_MODULE_TIMEOUT 0x11 +#define REP88_ERROR_MODULE_NOTINIT 0x13 +#define REP88_ERROR_MODULE_NOTAVAIL 0x14 +#define REP88_ERROR_MODULE_DISABLED 0x15 +#define REP88_ERROR_MODULE_IN_DIAGN 0x17 +#define REP88_ERROR_FASTPATH_DISABLD 0x19 +#define REP88_ERROR_MESSAGE_TYPE 0x20 +#define REP88_ERROR_MESSAGE_MALFORMD 0x22 +#define REP88_ERROR_MESSAGE_LENGTH 0x23 +#define REP88_ERROR_RESERVED_FIELD 0x24 +#define REP88_ERROR_KEY_TYPE 0x34 +#define REP88_ERROR_INVALID_KEY 0x82 +#define REP88_ERROR_OPERAND 0x84 +#define REP88_ERROR_OPERAND_EVEN_MOD 0x85 + #define CALLER_HEADER 12 static inline int @@ -687,7 +785,7 @@ testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%4 \n" " slgr 1,1 \n" " lgr 2,1 \n" @@ -757,7 +855,7 @@ resetq(int q_nr, struct ap_status_word *stat_p) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%2 \n" " lghi 1,1 \n" " sll 1,24 \n" @@ -823,7 +921,7 @@ sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" lgr 6,%3 \n" " llgfr 7,%2 \n" " llgt 0,0(6) \n" @@ -902,7 +1000,7 @@ rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id, int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%2 \n" " lgr 3,%4 \n" " lgr 6,%3 \n" @@ -1029,10 +1127,6 @@ query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type) stat = HD_ONLINE; *q_depth = t_depth + 1; switch (t_dev_type) { - case OTHER_HW: - stat = HD_NOT_THERE; - *dev_type = NILDEV; - break; case PCICA_HW: *dev_type = PCICA; break; @@ -1045,6 +1139,9 @@ query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type) case CEX2C_HW: *dev_type = CEX2C; break; + case CEX2A_HW: + *dev_type = CEX2A; + break; default: *dev_type = NILDEV; break; @@ -2029,6 +2126,177 @@ ICACRT_msg_to_type6CRT_msgX(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx, return 0; } +static int +ICAMEX_msg_to_type50MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p, + union type50_msg *z90cMsg_p) +{ + int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len; + unsigned char *mod_tgt, *exp_tgt, *inp_tgt; + union type50_msg *tmp_type50_msg; + + mod_len = icaMex_p->inputdatalength; + + msg_size = ((mod_len <= 128) ? TYPE50_MEB1_LEN : TYPE50_MEB2_LEN) + + CALLER_HEADER; + + memset(z90cMsg_p, 0, msg_size); + + tmp_type50_msg = (union type50_msg *) + ((unsigned char *) z90cMsg_p + CALLER_HEADER); + + tmp_type50_msg->meb1.header.msg_type_code = TYPE50_TYPE_CODE; + + if (mod_len <= 128) { + tmp_type50_msg->meb1.header.msg_len = TYPE50_MEB1_LEN; + tmp_type50_msg->meb1.keyblock_type = TYPE50_MEB1_FMT; + mod_tgt = tmp_type50_msg->meb1.modulus; + mod_tgt_len = sizeof(tmp_type50_msg->meb1.modulus); + exp_tgt = tmp_type50_msg->meb1.exponent; + exp_tgt_len = sizeof(tmp_type50_msg->meb1.exponent); + inp_tgt = tmp_type50_msg->meb1.message; + inp_tgt_len = sizeof(tmp_type50_msg->meb1.message); + } else { + tmp_type50_msg->meb2.header.msg_len = TYPE50_MEB2_LEN; + tmp_type50_msg->meb2.keyblock_type = TYPE50_MEB2_FMT; + mod_tgt = tmp_type50_msg->meb2.modulus; + mod_tgt_len = sizeof(tmp_type50_msg->meb2.modulus); + exp_tgt = tmp_type50_msg->meb2.exponent; + exp_tgt_len = sizeof(tmp_type50_msg->meb2.exponent); + inp_tgt = tmp_type50_msg->meb2.message; + inp_tgt_len = sizeof(tmp_type50_msg->meb2.message); + } + + mod_tgt += (mod_tgt_len - mod_len); + if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len)) + return SEN_RELEASED; + if (is_empty(mod_tgt, mod_len)) + return SEN_USER_ERROR; + exp_tgt += (exp_tgt_len - mod_len); + if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len)) + return SEN_RELEASED; + if (is_empty(exp_tgt, mod_len)) + return SEN_USER_ERROR; + inp_tgt += (inp_tgt_len - mod_len); + if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len)) + return SEN_RELEASED; + if (is_empty(inp_tgt, mod_len)) + return SEN_USER_ERROR; + + *z90cMsg_l_p = msg_size - CALLER_HEADER; + + return 0; +} + +static int +ICACRT_msg_to_type50CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, + int *z90cMsg_l_p, union type50_msg *z90cMsg_p) +{ + int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len, + dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len, long_offset; + unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt, + temp[8]; + union type50_msg *tmp_type50_msg; + + mod_len = icaMsg_p->inputdatalength; + short_len = mod_len / 2; + long_len = mod_len / 2 + 8; + long_offset = 0; + + if (long_len > 128) { + memset(temp, 0x00, sizeof(temp)); + if (copy_from_user(temp, icaMsg_p->np_prime, long_len-128)) + return SEN_RELEASED; + if (!is_empty(temp, 8)) + return SEN_NOT_AVAIL; + if (copy_from_user(temp, icaMsg_p->bp_key, long_len-128)) + return SEN_RELEASED; + if (!is_empty(temp, 8)) + return SEN_NOT_AVAIL; + if (copy_from_user(temp, icaMsg_p->u_mult_inv, long_len-128)) + return SEN_RELEASED; + if (!is_empty(temp, 8)) + return SEN_NOT_AVAIL; + long_offset = long_len - 128; + long_len = 128; + } + + tmp_size = ((mod_len <= 128) ? TYPE50_CRB1_LEN : TYPE50_CRB2_LEN) + + CALLER_HEADER; + + memset(z90cMsg_p, 0, tmp_size); + + tmp_type50_msg = (union type50_msg *) + ((unsigned char *) z90cMsg_p + CALLER_HEADER); + + tmp_type50_msg->crb1.header.msg_type_code = TYPE50_TYPE_CODE; + if (long_len <= 64) { + tmp_type50_msg->crb1.header.msg_len = TYPE50_CRB1_LEN; + tmp_type50_msg->crb1.keyblock_type = TYPE50_CRB1_FMT; + p_tgt = tmp_type50_msg->crb1.p; + p_tgt_len = sizeof(tmp_type50_msg->crb1.p); + q_tgt = tmp_type50_msg->crb1.q; + q_tgt_len = sizeof(tmp_type50_msg->crb1.q); + dp_tgt = tmp_type50_msg->crb1.dp; + dp_tgt_len = sizeof(tmp_type50_msg->crb1.dp); + dq_tgt = tmp_type50_msg->crb1.dq; + dq_tgt_len = sizeof(tmp_type50_msg->crb1.dq); + u_tgt = tmp_type50_msg->crb1.u; + u_tgt_len = sizeof(tmp_type50_msg->crb1.u); + inp_tgt = tmp_type50_msg->crb1.message; + inp_tgt_len = sizeof(tmp_type50_msg->crb1.message); + } else { + tmp_type50_msg->crb2.header.msg_len = TYPE50_CRB2_LEN; + tmp_type50_msg->crb2.keyblock_type = TYPE50_CRB2_FMT; + p_tgt = tmp_type50_msg->crb2.p; + p_tgt_len = sizeof(tmp_type50_msg->crb2.p); + q_tgt = tmp_type50_msg->crb2.q; + q_tgt_len = sizeof(tmp_type50_msg->crb2.q); + dp_tgt = tmp_type50_msg->crb2.dp; + dp_tgt_len = sizeof(tmp_type50_msg->crb2.dp); + dq_tgt = tmp_type50_msg->crb2.dq; + dq_tgt_len = sizeof(tmp_type50_msg->crb2.dq); + u_tgt = tmp_type50_msg->crb2.u; + u_tgt_len = sizeof(tmp_type50_msg->crb2.u); + inp_tgt = tmp_type50_msg->crb2.message; + inp_tgt_len = sizeof(tmp_type50_msg->crb2.message); + } + + p_tgt += (p_tgt_len - long_len); + if (copy_from_user(p_tgt, icaMsg_p->np_prime + long_offset, long_len)) + return SEN_RELEASED; + if (is_empty(p_tgt, long_len)) + return SEN_USER_ERROR; + q_tgt += (q_tgt_len - short_len); + if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len)) + return SEN_RELEASED; + if (is_empty(q_tgt, short_len)) + return SEN_USER_ERROR; + dp_tgt += (dp_tgt_len - long_len); + if (copy_from_user(dp_tgt, icaMsg_p->bp_key + long_offset, long_len)) + return SEN_RELEASED; + if (is_empty(dp_tgt, long_len)) + return SEN_USER_ERROR; + dq_tgt += (dq_tgt_len - short_len); + if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len)) + return SEN_RELEASED; + if (is_empty(dq_tgt, short_len)) + return SEN_USER_ERROR; + u_tgt += (u_tgt_len - long_len); + if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv + long_offset, long_len)) + return SEN_RELEASED; + if (is_empty(u_tgt, long_len)) + return SEN_USER_ERROR; + inp_tgt += (inp_tgt_len - mod_len); + if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len)) + return SEN_RELEASED; + if (is_empty(inp_tgt, mod_len)) + return SEN_USER_ERROR; + + *z90cMsg_l_p = tmp_size - CALLER_HEADER; + + return 0; +} + int convert_request(unsigned char *buffer, int func, unsigned short function, int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p) @@ -2071,6 +2339,16 @@ convert_request(unsigned char *buffer, int func, unsigned short function, cdx, msg_l_p, (struct type6_msg *) msg_p, dev_type); } + if (dev_type == CEX2A) { + if (func == ICARSACRT) + return ICACRT_msg_to_type50CRT_msg( + (struct ica_rsa_modexpo_crt *) buffer, + msg_l_p, (union type50_msg *) msg_p); + else + return ICAMEX_msg_to_type50MEX_msg( + (struct ica_rsa_modexpo *) buffer, + msg_l_p, (union type50_msg *) msg_p); + } return 0; } @@ -2081,8 +2359,8 @@ unset_ext_bitlens(void) { if (!ext_bitlens_msg_count) { PRINTK("Unable to use coprocessors for extended bitlengths. " - "Using PCICAs (if present) for extended bitlengths. " - "This is not an error.\n"); + "Using PCICAs/CEX2As (if present) for extended " + "bitlengths. This is not an error.\n"); ext_bitlens_msg_count++; } ext_bitlens = 0; @@ -2094,6 +2372,7 @@ convert_response(unsigned char *response, unsigned char *buffer, { struct ica_rsa_modexpo *icaMsg_p = (struct ica_rsa_modexpo *) buffer; struct error_hdr *errh_p = (struct error_hdr *) response; + struct type80_hdr *t80h_p = (struct type80_hdr *) response; struct type84_hdr *t84h_p = (struct type84_hdr *) response; struct type86_fmt2_msg *t86m_p = (struct type86_fmt2_msg *) response; int reply_code, service_rc, service_rs, src_l; @@ -2108,6 +2387,7 @@ convert_response(unsigned char *response, unsigned char *buffer, src_l = 0; switch (errh_p->type) { case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: reply_code = errh_p->reply_code; src_p = (unsigned char *)errh_p; PRINTK("Hardware error: Type %02X Message Header: " @@ -2116,6 +2396,10 @@ convert_response(unsigned char *response, unsigned char *buffer, src_p[0], src_p[1], src_p[2], src_p[3], src_p[4], src_p[5], src_p[6], src_p[7]); break; + case TYPE80_RSP_CODE: + src_l = icaMsg_p->outputdatalength; + src_p = response + (int)t80h_p->len - src_l; + break; case TYPE84_RSP_CODE: src_l = icaMsg_p->outputdatalength; src_p = response + (int)t84h_p->len - src_l; @@ -2202,6 +2486,7 @@ convert_response(unsigned char *response, unsigned char *buffer, if (reply_code) switch (reply_code) { case REP82_ERROR_OPERAND_INVALID: + case REP88_ERROR_MESSAGE_MALFORMD: return REC_OPERAND_INV; case REP82_ERROR_OPERAND_SIZE: return REC_OPERAND_SIZE; diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c index 790fcbb74b4..135ae04e6e7 100644 --- a/drivers/s390/crypto/z90main.c +++ b/drivers/s390/crypto/z90main.c @@ -228,7 +228,7 @@ struct device_x { */ struct device { int dev_type; // PCICA, PCICC, PCIXCC_MCL2, - // PCIXCC_MCL3, CEX2C + // PCIXCC_MCL3, CEX2C, CEX2A enum devstat dev_stat; // current device status int dev_self_x; // Index in array int disabled; // Set when device is in error @@ -295,26 +295,30 @@ struct caller { /** * Function prototypes from z90hardware.c */ -enum hdstat query_online(int, int, int, int *, int *); -enum devstat reset_device(int, int, int); -enum devstat send_to_AP(int, int, int, unsigned char *); -enum devstat receive_from_AP(int, int, int, unsigned char *, unsigned char *); -int convert_request(unsigned char *, int, short, int, int, int *, - unsigned char *); -int convert_response(unsigned char *, unsigned char *, int *, unsigned char *); +enum hdstat query_online(int deviceNr, int cdx, int resetNr, int *q_depth, + int *dev_type); +enum devstat reset_device(int deviceNr, int cdx, int resetNr); +enum devstat send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext); +enum devstat receive_from_AP(int dev_nr, int cdx, int resplen, + unsigned char *resp, unsigned char *psmid); +int convert_request(unsigned char *buffer, int func, unsigned short function, + int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p); +int convert_response(unsigned char *response, unsigned char *buffer, + int *respbufflen_p, unsigned char *resp_buff); /** * Low level function prototypes */ -static int create_z90crypt(int *); -static int refresh_z90crypt(int *); -static int find_crypto_devices(struct status *); -static int create_crypto_device(int); -static int destroy_crypto_device(int); +static int create_z90crypt(int *cdx_p); +static int refresh_z90crypt(int *cdx_p); +static int find_crypto_devices(struct status *deviceMask); +static int create_crypto_device(int index); +static int destroy_crypto_device(int index); static void destroy_z90crypt(void); -static int refresh_index_array(struct status *, struct device_x *); -static int probe_device_type(struct device *); -static int probe_PCIXCC_type(struct device *); +static int refresh_index_array(struct status *status_str, + struct device_x *index_array); +static int probe_device_type(struct device *devPtr); +static int probe_PCIXCC_type(struct device *devPtr); /** * proc fs definitions @@ -425,7 +429,7 @@ static struct miscdevice z90crypt_misc_device = { MODULE_AUTHOR("zSeries Linux Crypto Team: Robert H. Burroughs, Eric D. Rossman" "and Jochen Roehrig"); MODULE_DESCRIPTION("zSeries Linux Cryptographic Coprocessor device driver, " - "Copyright 2001, 2004 IBM Corporation"); + "Copyright 2001, 2005 IBM Corporation"); MODULE_LICENSE("GPL"); module_param(domain, int, 0); MODULE_PARM_DESC(domain, "domain index for device"); @@ -860,6 +864,12 @@ get_status_CEX2Ccount(void) } static inline int +get_status_CEX2Acount(void) +{ + return z90crypt.hdware_info->type_mask[CEX2A].st_count; +} + +static inline int get_status_requestq_count(void) { return requestq_count; @@ -1008,11 +1018,13 @@ static inline int select_device_type(int *dev_type_p, int bytelength) { static int count = 0; - int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, index_to_use; + int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, CEX2A_avail, + index_to_use; struct status *stat; if ((*dev_type_p != PCICC) && (*dev_type_p != PCICA) && (*dev_type_p != PCIXCC_MCL2) && (*dev_type_p != PCIXCC_MCL3) && - (*dev_type_p != CEX2C) && (*dev_type_p != ANYDEV)) + (*dev_type_p != CEX2C) && (*dev_type_p != CEX2A) && + (*dev_type_p != ANYDEV)) return -1; if (*dev_type_p != ANYDEV) { stat = &z90crypt.hdware_info->type_mask[*dev_type_p]; @@ -1022,7 +1034,13 @@ select_device_type(int *dev_type_p, int bytelength) return -1; } - /* Assumption: PCICA, PCIXCC_MCL3, and CEX2C are all similar in speed */ + /** + * Assumption: PCICA, PCIXCC_MCL3, CEX2C, and CEX2A are all similar in + * speed. + * + * PCICA and CEX2A do NOT co-exist, so it would be either one or the + * other present. + */ stat = &z90crypt.hdware_info->type_mask[PCICA]; PCICA_avail = stat->st_count - (stat->disabled_count + stat->user_disabled_count); @@ -1032,29 +1050,38 @@ select_device_type(int *dev_type_p, int bytelength) stat = &z90crypt.hdware_info->type_mask[CEX2C]; CEX2C_avail = stat->st_count - (stat->disabled_count + stat->user_disabled_count); - if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail) { + stat = &z90crypt.hdware_info->type_mask[CEX2A]; + CEX2A_avail = stat->st_count - + (stat->disabled_count + stat->user_disabled_count); + if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail || CEX2A_avail) { /** - * bitlength is a factor, PCICA is the most capable, even with - * the new MCL for PCIXCC. + * bitlength is a factor, PCICA or CEX2A are the most capable, + * even with the new MCL for PCIXCC. */ if ((bytelength < PCIXCC_MIN_MOD_SIZE) || (!ext_bitlens && (bytelength < OLD_PCIXCC_MIN_MOD_SIZE))) { - if (!PCICA_avail) - return -1; - else { + if (PCICA_avail) { *dev_type_p = PCICA; return 0; } + if (CEX2A_avail) { + *dev_type_p = CEX2A; + return 0; + } + return -1; } index_to_use = count % (PCICA_avail + PCIXCC_MCL3_avail + - CEX2C_avail); + CEX2C_avail + CEX2A_avail); if (index_to_use < PCICA_avail) *dev_type_p = PCICA; else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail)) *dev_type_p = PCIXCC_MCL3; - else + else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail + + CEX2C_avail)) *dev_type_p = CEX2C; + else + *dev_type_p = CEX2A; count++; return 0; } @@ -1359,7 +1386,7 @@ build_caller(struct work_element *we_p, short function) if ((we_p->devtype != PCICC) && (we_p->devtype != PCICA) && (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) && - (we_p->devtype != CEX2C)) + (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A)) return SEN_NOT_AVAIL; memcpy(caller_p->caller_id, we_p->caller_id, @@ -1428,7 +1455,8 @@ get_crypto_request_buffer(struct work_element *we_p) if ((we_p->devtype != PCICA) && (we_p->devtype != PCICC) && (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) && - (we_p->devtype != CEX2C) && (we_p->devtype != ANYDEV)) { + (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A) && + (we_p->devtype != ANYDEV)) { PRINTK("invalid device type\n"); return SEN_USER_ERROR; } @@ -1503,8 +1531,9 @@ get_crypto_request_buffer(struct work_element *we_p) function = PCI_FUNC_KEY_ENCRYPT; switch (we_p->devtype) { - /* PCICA does everything with a simple RSA mod-expo operation */ + /* PCICA and CEX2A do everything with a simple RSA mod-expo operation */ case PCICA: + case CEX2A: function = PCI_FUNC_KEY_ENCRYPT; break; /** @@ -1662,7 +1691,8 @@ z90crypt_rsa(struct priv_data *private_data_p, pid_t pid, * trigger a fallback to software. */ case -EINVAL: - if (we_p->devtype != PCICA) + if ((we_p->devtype != PCICA) && + (we_p->devtype != CEX2A)) rv = -EGETBUFF; break; case -ETIMEOUT: @@ -1779,6 +1809,12 @@ z90crypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = -EFAULT; break; + case Z90STAT_CEX2ACOUNT: + tempstat = get_status_CEX2Acount(); + if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) + ret = -EFAULT; + break; + case Z90STAT_REQUESTQ_COUNT: tempstat = get_status_requestq_count(); if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) @@ -2019,6 +2055,8 @@ z90crypt_status(char *resp_buff, char **start, off_t offset, get_status_PCIXCCMCL3count()); len += sprintf(resp_buff+len, "CEX2C count: %d\n", get_status_CEX2Ccount()); + len += sprintf(resp_buff+len, "CEX2A count: %d\n", + get_status_CEX2Acount()); len += sprintf(resp_buff+len, "requestq count: %d\n", get_status_requestq_count()); len += sprintf(resp_buff+len, "pendingq count: %d\n", @@ -2026,8 +2064,8 @@ z90crypt_status(char *resp_buff, char **start, off_t offset, len += sprintf(resp_buff+len, "Total open handles: %d\n\n", get_status_totalopen_count()); len += sprinthx( - "Online devices: 1: PCICA, 2: PCICC, 3: PCIXCC (MCL2), " - "4: PCIXCC (MCL3), 5: CEX2C", + "Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) " + "4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A", resp_buff+len, get_status_status_mask(workarea), Z90CRYPT_NUM_APS); @@ -2140,6 +2178,7 @@ z90crypt_status_write(struct file *file, const char __user *buffer, case '3': // PCIXCC_MCL2 case '4': // PCIXCC_MCL3 case '5': // CEX2C + case '6': // CEX2A j++; break; case 'd': @@ -3007,7 +3046,9 @@ create_crypto_device(int index) z90crypt.hdware_info->device_type_array[index] = 4; else if (deviceType == CEX2C) z90crypt.hdware_info->device_type_array[index] = 5; - else + else if (deviceType == CEX2A) + z90crypt.hdware_info->device_type_array[index] = 6; + else // No idea how this would happen. z90crypt.hdware_info->device_type_array[index] = -1; } diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index a7efc394515..54885475492 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -1,5 +1,5 @@ menu "S/390 network device drivers" - depends on NETDEVICES && ARCH_S390 + depends on NETDEVICES && S390 config LCS tristate "Lan Channel Station Interface" diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index 6b63d21612e..e70af7f3994 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -1603,7 +1603,7 @@ dumpit(char* buf, int len) __u32 ct, sw, rm, dup; char *ptr, *rptr; char tbuf[82], tdup[82]; -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) char addr[22]; #else char addr[12]; @@ -1619,7 +1619,7 @@ dumpit(char* buf, int len) dup = 0; for ( ct=0; ct < len; ct++, ptr++, rptr++ ) { if (sw == 0) { -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) sprintf(addr, "%16.16lX",(unsigned long)rptr); #else sprintf(addr, "%8.8X",(__u32)rptr); @@ -1634,7 +1634,7 @@ dumpit(char* buf, int len) if (sw == 8) { strcat(bhex, " "); } -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) sprintf(tbuf,"%2.2lX", (unsigned long)*ptr); #else sprintf(tbuf,"%2.2X", (__u32)*ptr); diff --git a/drivers/s390/net/cu3088.c b/drivers/s390/net/cu3088.c index 0075894c71d..77dacb46573 100644 --- a/drivers/s390/net/cu3088.c +++ b/drivers/s390/net/cu3088.c @@ -1,5 +1,5 @@ /* - * $Id: cu3088.c,v 1.35 2005/03/30 19:28:52 richtera Exp $ + * $Id: cu3088.c,v 1.36 2005/10/25 14:37:17 cohuck Exp $ * * CTC / LCS ccw_device driver * @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/err.h> +#include <asm/s390_rdev.h> #include <asm/ccwdev.h> #include <asm/ccwgroup.h> diff --git a/drivers/s390/net/iucv.c b/drivers/s390/net/iucv.c index df7647c3c10..ea817739256 100644 --- a/drivers/s390/net/iucv.c +++ b/drivers/s390/net/iucv.c @@ -1,5 +1,5 @@ /* - * $Id: iucv.c,v 1.45 2005/04/26 22:59:06 braunu Exp $ + * $Id: iucv.c,v 1.47 2005/11/21 11:35:22 mschwide Exp $ * * IUCV network driver * @@ -29,7 +29,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * RELEASE-TAG: IUCV lowlevel driver $Revision: 1.45 $ + * RELEASE-TAG: IUCV lowlevel driver $Revision: 1.47 $ * */ @@ -54,7 +54,7 @@ #include <asm/s390_ext.h> #include <asm/ebcdic.h> #include <asm/smp.h> -#include <asm/ccwdev.h> //for root device stuff +#include <asm/s390_rdev.h> /* FLAGS: * All flags are defined in the field IPFLAGS1 of each function @@ -355,7 +355,7 @@ do { \ static void iucv_banner(void) { - char vbuf[] = "$Revision: 1.45 $"; + char vbuf[] = "$Revision: 1.47 $"; char *version = vbuf; if ((version = strchr(version, ':'))) { @@ -477,7 +477,7 @@ grab_param(void) ptr++; if (ptr >= iucv_param_pool + PARAM_POOL_SIZE) ptr = iucv_param_pool; - } while (atomic_compare_and_swap(0, 1, &ptr->in_use)); + } while (atomic_cmpxchg(&ptr->in_use, 0, 1) != 0); hint = ptr - iucv_param_pool; memset(&ptr->param, 0, sizeof(ptr->param)); diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index f8f55cc468b..97f927c01a8 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -65,6 +65,7 @@ #include <asm/timex.h> #include <asm/semaphore.h> #include <asm/uaccess.h> +#include <asm/s390_rdev.h> #include "qeth.h" #include "qeth_mpc.h" @@ -1396,7 +1397,7 @@ qeth_idx_activate_get_answer(struct qeth_channel *channel, channel->ccw.cda = (__u32) __pa(iob->data); wait_event(card->wait_q, - atomic_compare_and_swap(0,1,&channel->irq_pending) == 0); + atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(setup, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); rc = ccw_device_start(channel->ccwdev, @@ -1463,7 +1464,7 @@ qeth_idx_activate_channel(struct qeth_channel *channel, memcpy(QETH_IDX_ACT_QDIO_DEV_REALADDR(iob->data), &temp, 2); wait_event(card->wait_q, - atomic_compare_and_swap(0,1,&channel->irq_pending) == 0); + atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(setup, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); rc = ccw_device_start(channel->ccwdev, @@ -1616,7 +1617,7 @@ qeth_issue_next_read(struct qeth_card *card) } qeth_setup_ccw(&card->read, iob->data, QETH_BUFSIZE); wait_event(card->wait_q, - atomic_compare_and_swap(0,1,&card->read.irq_pending) == 0); + atomic_cmpxchg(&card->read.irq_pending, 0, 1) == 0); QETH_DBF_TEXT(trace, 6, "noirqpnd"); rc = ccw_device_start(card->read.ccwdev, &card->read.ccw, (addr_t) iob, 0, 0); @@ -1882,7 +1883,7 @@ qeth_send_control_data(struct qeth_card *card, int len, spin_unlock_irqrestore(&card->lock, flags); QETH_DBF_HEX(control, 2, iob->data, QETH_DBF_CONTROL_LEN); wait_event(card->wait_q, - atomic_compare_and_swap(0,1,&card->write.irq_pending) == 0); + atomic_cmpxchg(&card->write.irq_pending, 0, 1) == 0); qeth_prepare_control_data(card, len, iob); if (IS_IPA(iob->data)) timer.expires = jiffies + QETH_IPA_TIMEOUT; @@ -1924,7 +1925,7 @@ qeth_osn_send_control_data(struct qeth_card *card, int len, QETH_DBF_TEXT(trace, 5, "osndctrd"); wait_event(card->wait_q, - atomic_compare_and_swap(0,1,&card->write.irq_pending) == 0); + atomic_cmpxchg(&card->write.irq_pending, 0, 1) == 0); qeth_prepare_control_data(card, len, iob); QETH_DBF_TEXT(trace, 6, "osnoirqp"); spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags); @@ -4236,9 +4237,8 @@ qeth_do_send_packet_fast(struct qeth_card *card, struct qeth_qdio_out_q *queue, QETH_DBF_TEXT(trace, 6, "dosndpfa"); /* spin until we get the queue ... */ - while (atomic_compare_and_swap(QETH_OUT_Q_UNLOCKED, - QETH_OUT_Q_LOCKED, - &queue->state)); + while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, + QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); /* ... now we've got the queue */ index = queue->next_buf_to_fill; buffer = &queue->bufs[queue->next_buf_to_fill]; @@ -4292,9 +4292,8 @@ qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, QETH_DBF_TEXT(trace, 6, "dosndpkt"); /* spin until we get the queue ... */ - while (atomic_compare_and_swap(QETH_OUT_Q_UNLOCKED, - QETH_OUT_Q_LOCKED, - &queue->state)); + while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, + QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); start_index = queue->next_buf_to_fill; buffer = &queue->bufs[queue->next_buf_to_fill]; /* diff --git a/drivers/s390/s390_rdev.c b/drivers/s390/s390_rdev.c new file mode 100644 index 00000000000..566cc3d185b --- /dev/null +++ b/drivers/s390/s390_rdev.c @@ -0,0 +1,53 @@ +/* + * drivers/s390/s390_rdev.c + * s390 root device + * $Revision: 1.2 $ + * + * Copyright (C) 2002, 2005 IBM Deutschland Entwicklung GmbH, + * IBM Corporation + * Author(s): Cornelia Huck (cohuck@de.ibm.com) + * Carsten Otte (cotte@de.ibm.com) + */ + +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/device.h> +#include <asm/s390_rdev.h> + +static void +s390_root_dev_release(struct device *dev) +{ + kfree(dev); +} + +struct device * +s390_root_dev_register(const char *name) +{ + struct device *dev; + int ret; + + if (!strlen(name)) + return ERR_PTR(-EINVAL); + dev = kmalloc(sizeof(struct device), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + memset(dev, 0, sizeof(struct device)); + strncpy(dev->bus_id, name, min(strlen(name), (size_t)BUS_ID_SIZE)); + dev->release = s390_root_dev_release; + ret = device_register(dev); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + return dev; +} + +void +s390_root_dev_unregister(struct device *dev) +{ + if (dev) + device_unregister(dev); +} + +EXPORT_SYMBOL(s390_root_dev_register); +EXPORT_SYMBOL(s390_root_dev_unregister); diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 4191fd9d4d1..3bf46660351 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -23,7 +23,7 @@ static struct semaphore m_sem; -extern int css_process_crw(int); +extern int css_process_crw(int, int); extern int chsc_process_crw(void); extern int chp_process_crw(int, int); extern void css_reiterate_subchannels(void); @@ -49,9 +49,10 @@ s390_handle_damage(char *msg) static int s390_collect_crw_info(void *param) { - struct crw crw; + struct crw crw[2]; int ccode, ret, slow; struct semaphore *sem; + unsigned int chain; sem = (struct semaphore *)param; /* Set a nice name. */ @@ -59,25 +60,50 @@ s390_collect_crw_info(void *param) repeat: down_interruptible(sem); slow = 0; + chain = 0; while (1) { - ccode = stcrw(&crw); + if (unlikely(chain > 1)) { + struct crw tmp_crw; + + printk(KERN_WARNING"%s: Code does not support more " + "than two chained crws; please report to " + "linux390@de.ibm.com!\n", __FUNCTION__); + ccode = stcrw(&tmp_crw); + printk(KERN_WARNING"%s: crw reports slct=%d, oflw=%d, " + "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n", + __FUNCTION__, tmp_crw.slct, tmp_crw.oflw, + tmp_crw.chn, tmp_crw.rsc, tmp_crw.anc, + tmp_crw.erc, tmp_crw.rsid); + printk(KERN_WARNING"%s: This was crw number %x in the " + "chain\n", __FUNCTION__, chain); + if (ccode != 0) + break; + chain = tmp_crw.chn ? chain + 1 : 0; + continue; + } + ccode = stcrw(&crw[chain]); if (ccode != 0) break; DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, " "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n", - crw.slct, crw.oflw, crw.chn, crw.rsc, crw.anc, - crw.erc, crw.rsid); + crw[chain].slct, crw[chain].oflw, crw[chain].chn, + crw[chain].rsc, crw[chain].anc, crw[chain].erc, + crw[chain].rsid); /* Check for overflows. */ - if (crw.oflw) { + if (crw[chain].oflw) { pr_debug("%s: crw overflow detected!\n", __FUNCTION__); css_reiterate_subchannels(); + chain = 0; slow = 1; continue; } - switch (crw.rsc) { + switch (crw[chain].rsc) { case CRW_RSC_SCH: - pr_debug("source is subchannel %04X\n", crw.rsid); - ret = css_process_crw (crw.rsid); + if (crw[0].chn && !chain) + break; + pr_debug("source is subchannel %04X\n", crw[0].rsid); + ret = css_process_crw (crw[0].rsid, + chain ? crw[1].rsid : 0); if (ret == -EAGAIN) slow = 1; break; @@ -85,18 +111,18 @@ repeat: pr_debug("source is monitoring facility\n"); break; case CRW_RSC_CPATH: - pr_debug("source is channel path %02X\n", crw.rsid); - switch (crw.erc) { + pr_debug("source is channel path %02X\n", crw[0].rsid); + switch (crw[0].erc) { case CRW_ERC_IPARM: /* Path has come. */ - ret = chp_process_crw(crw.rsid, 1); + ret = chp_process_crw(crw[0].rsid, 1); break; case CRW_ERC_PERRI: /* Path has gone. */ case CRW_ERC_PERRN: - ret = chp_process_crw(crw.rsid, 0); + ret = chp_process_crw(crw[0].rsid, 0); break; default: pr_debug("Don't know how to handle erc=%x\n", - crw.erc); + crw[0].erc); ret = 0; } if (ret == -EAGAIN) @@ -115,6 +141,8 @@ repeat: pr_debug("unknown source\n"); break; } + /* chain is always 0 or 1 here. */ + chain = crw[chain].chn ? chain + 1 : 0; } if (slow) queue_work(slow_path_wq, &slow_path_work); @@ -218,7 +246,7 @@ s390_revalidate_registers(struct mci *mci) */ kill_task = 1; -#ifndef __s390x__ +#ifndef CONFIG_64BIT asm volatile("ld 0,0(%0)\n" "ld 2,8(%0)\n" "ld 4,16(%0)\n" @@ -227,7 +255,7 @@ s390_revalidate_registers(struct mci *mci) #endif if (MACHINE_HAS_IEEE) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT fpt_save_area = &S390_lowcore.floating_pt_save_area; fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; #else @@ -286,7 +314,7 @@ s390_revalidate_registers(struct mci *mci) */ s390_handle_damage("invalid control registers."); else -#ifdef __s390x__ +#ifdef CONFIG_64BIT asm volatile("lctlg 0,15,0(%0)" : : "a" (&S390_lowcore.cregs_save_area)); #else @@ -299,7 +327,7 @@ s390_revalidate_registers(struct mci *mci) * can't write something sensible into that register. */ -#ifdef __s390x__ +#ifdef CONFIG_64BIT /* * See if we can revalidate the TOD programmable register with its * old contents (should be zero) otherwise set it to zero. @@ -356,7 +384,7 @@ s390_do_machine_check(struct pt_regs *regs) if (mci->b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; -#ifdef __s390x__ +#ifdef CONFIG_64BIT z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c index 87c2db1bd4f..66da840c931 100644 --- a/drivers/s390/sysinfo.c +++ b/drivers/s390/sysinfo.c @@ -106,7 +106,7 @@ static inline int stsi (void *sysinfo, { int cc, retv; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT __asm__ __volatile__ ( "lr\t0,%2\n" "\tlr\t1,%3\n" "\tstsi\t0(%4)\n" diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4c42065dea8..3c606cf8c8c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -914,7 +914,7 @@ config SCSI_INIA100 config SCSI_PPA tristate "IOMEGA parallel port (ppa - older drives)" - depends on SCSI && PARPORT + depends on SCSI && PARPORT_PC ---help--- This driver supports older versions of IOMEGA's parallel port ZIP drive (a 100 MB removable media device). @@ -941,7 +941,7 @@ config SCSI_PPA config SCSI_IMM tristate "IOMEGA parallel port (imm - newer drives)" - depends on SCSI && PARPORT + depends on SCSI && PARPORT_PC ---help--- This driver supports newer versions of IOMEGA's parallel port ZIP drive (a 100 MB removable media device). @@ -968,7 +968,7 @@ config SCSI_IMM config SCSI_IZIP_EPP16 bool "ppa/imm option - Use slow (but safe) EPP-16" - depends on PARPORT && (SCSI_PPA || SCSI_IMM) + depends on SCSI_PPA || SCSI_IMM ---help--- EPP (Enhanced Parallel Port) is a standard for parallel ports which allows them to act as expansion buses that can handle up to 64 @@ -983,7 +983,7 @@ config SCSI_IZIP_EPP16 config SCSI_IZIP_SLOW_CTR bool "ppa/imm option - Assume slow parport control register" - depends on PARPORT && (SCSI_PPA || SCSI_IMM) + depends on SCSI_PPA || SCSI_IMM help Some parallel ports are known to have excessive delays between changing the parallel port control register and good data being @@ -1815,7 +1815,7 @@ config SCSI_SUNESP config ZFCP tristate "FCP host bus adapter driver for IBM eServer zSeries" - depends on ARCH_S390 && QDIO && SCSI + depends on S390 && QDIO && SCSI select SCSI_FC_ATTRS help If you want to access SCSI devices attached to your IBM eServer diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 4b647eefc9a..557788ec4ee 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -166,6 +166,8 @@ static struct pci_driver piix_pci_driver = { .id_table = piix_pci_tbl, .probe = piix_init_one, .remove = ata_pci_remove_one, + .suspend = ata_pci_device_suspend, + .resume = ata_pci_device_resume, }; static struct scsi_host_template piix_sht = { @@ -185,6 +187,8 @@ static struct scsi_host_template piix_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, + .resume = ata_scsi_device_resume, + .suspend = ata_scsi_device_suspend, }; static const struct ata_port_operations piix_pata_ops = { diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index bdfb0a88cd6..f55b9b3f7b3 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4173,6 +4173,96 @@ err_out: * Inherited from caller. */ +/* + * Execute a 'simple' command, that only consists of the opcode 'cmd' itself, + * without filling any other registers + */ +static int ata_do_simple_cmd(struct ata_port *ap, struct ata_device *dev, + u8 cmd) +{ + struct ata_taskfile tf; + int err; + + ata_tf_init(ap, &tf, dev->devno); + + tf.command = cmd; + tf.flags |= ATA_TFLAG_DEVICE; + tf.protocol = ATA_PROT_NODATA; + + err = ata_exec_internal(ap, dev, &tf, DMA_NONE, NULL, 0); + if (err) + printk(KERN_ERR "%s: ata command failed: %d\n", + __FUNCTION__, err); + + return err; +} + +static int ata_flush_cache(struct ata_port *ap, struct ata_device *dev) +{ + u8 cmd; + + if (!ata_try_flush_cache(dev)) + return 0; + + if (ata_id_has_flush_ext(dev->id)) + cmd = ATA_CMD_FLUSH_EXT; + else + cmd = ATA_CMD_FLUSH; + + return ata_do_simple_cmd(ap, dev, cmd); +} + +static int ata_standby_drive(struct ata_port *ap, struct ata_device *dev) +{ + return ata_do_simple_cmd(ap, dev, ATA_CMD_STANDBYNOW1); +} + +static int ata_start_drive(struct ata_port *ap, struct ata_device *dev) +{ + return ata_do_simple_cmd(ap, dev, ATA_CMD_IDLEIMMEDIATE); +} + +/** + * ata_device_resume - wakeup a previously suspended devices + * + * Kick the drive back into action, by sending it an idle immediate + * command and making sure its transfer mode matches between drive + * and host. + * + */ +int ata_device_resume(struct ata_port *ap, struct ata_device *dev) +{ + if (ap->flags & ATA_FLAG_SUSPENDED) { + ap->flags &= ~ATA_FLAG_SUSPENDED; + ata_set_mode(ap); + } + if (!ata_dev_present(dev)) + return 0; + if (dev->class == ATA_DEV_ATA) + ata_start_drive(ap, dev); + + return 0; +} + +/** + * ata_device_suspend - prepare a device for suspend + * + * Flush the cache on the drive, if appropriate, then issue a + * standbynow command. + * + */ +int ata_device_suspend(struct ata_port *ap, struct ata_device *dev) +{ + if (!ata_dev_present(dev)) + return 0; + if (dev->class == ATA_DEV_ATA) + ata_flush_cache(ap, dev); + + ata_standby_drive(ap, dev); + ap->flags |= ATA_FLAG_SUSPENDED; + return 0; +} + int ata_port_start (struct ata_port *ap) { struct device *dev = ap->host_set->dev; @@ -4921,6 +5011,23 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) return (tmp == bits->val) ? 1 : 0; } + +int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +int ata_pci_device_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + pci_enable_device(pdev); + pci_set_master(pdev); + return 0; +} #endif /* CONFIG_PCI */ @@ -5024,4 +5131,11 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop); EXPORT_SYMBOL_GPL(ata_pci_init_native_mode); EXPORT_SYMBOL_GPL(ata_pci_init_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); +EXPORT_SYMBOL_GPL(ata_pci_device_suspend); +EXPORT_SYMBOL_GPL(ata_pci_device_resume); #endif /* CONFIG_PCI */ + +EXPORT_SYMBOL_GPL(ata_device_suspend); +EXPORT_SYMBOL_GPL(ata_device_resume); +EXPORT_SYMBOL_GPL(ata_scsi_device_suspend); +EXPORT_SYMBOL_GPL(ata_scsi_device_resume); diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 2c644cbb6e9..cfbceb50471 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -396,6 +396,22 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf) } } +int ata_scsi_device_resume(struct scsi_device *sdev) +{ + struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0]; + struct ata_device *dev = &ap->device[sdev->id]; + + return ata_device_resume(ap, dev); +} + +int ata_scsi_device_suspend(struct scsi_device *sdev) +{ + struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0]; + struct ata_device *dev = &ap->device[sdev->id]; + + return ata_device_suspend(ap, dev); +} + /** * ata_to_sense_error - convert ATA error to SCSI error * @id: ATA device number diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 15842b1f0f4..ea7f3a43357 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -263,9 +263,40 @@ static int scsi_bus_match(struct device *dev, struct device_driver *gendrv) return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0; } +static int scsi_bus_suspend(struct device * dev, pm_message_t state) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_host_template *sht = sdev->host->hostt; + int err; + + err = scsi_device_quiesce(sdev); + if (err) + return err; + + if (sht->suspend) + err = sht->suspend(sdev); + + return err; +} + +static int scsi_bus_resume(struct device * dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_host_template *sht = sdev->host->hostt; + int err = 0; + + if (sht->resume) + err = sht->resume(sdev); + + scsi_device_resume(sdev); + return err; +} + struct bus_type scsi_bus_type = { .name = "scsi", .match = scsi_bus_match, + .suspend = scsi_bus_suspend, + .resume = scsi_bus_resume, }; int scsi_sysfs_register(void) diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index b8727d9bf69..1288d6203e9 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -37,11 +37,11 @@ * by the bootloader or in the platform init code. * * The idx field must be equal to the PSC index ( e.g. 0 for PSC1, 1 for PSC2, - * and so on). So the PSC1 is mapped to /dev/ttyS0, PSC2 to /dev/ttyS1 and so - * on. But be warned, it's an ABSOLUTE REQUIREMENT ! This is needed mainly for - * the console code : without this 1:1 mapping, at early boot time, when we are - * parsing the kernel args console=ttyS?, we wouldn't know wich PSC it will be - * mapped to. + * and so on). So the PSC1 is mapped to /dev/ttyPSC0, PSC2 to /dev/ttyPSC1 and + * so on. But be warned, it's an ABSOLUTE REQUIREMENT ! This is needed mainly + * fpr the console code : without this 1:1 mapping, at early boot time, when we + * are parsing the kernel args console=ttyPSC?, we wouldn't know wich PSC it + * will be mapped to. */ #include <linux/config.h> @@ -65,6 +65,10 @@ #include <linux/serial_core.h> +/* We've been assigned a range on the "Low-density serial ports" major */ +#define SERIAL_PSC_MAJOR 204 +#define SERIAL_PSC_MINOR 148 + #define ISR_PASS_LIMIT 256 /* Max number of iteration in the interrupt */ @@ -668,15 +672,15 @@ mpc52xx_console_setup(struct console *co, char *options) } -extern struct uart_driver mpc52xx_uart_driver; +static struct uart_driver mpc52xx_uart_driver; static struct console mpc52xx_console = { - .name = "ttyS", + .name = "ttyPSC", .write = mpc52xx_console_write, .device = uart_console_device, .setup = mpc52xx_console_setup, .flags = CON_PRINTBUFFER, - .index = -1, /* Specified on the cmdline (e.g. console=ttyS0 ) */ + .index = -1, /* Specified on the cmdline (e.g. console=ttyPSC0 ) */ .data = &mpc52xx_uart_driver, }; @@ -703,10 +707,10 @@ console_initcall(mpc52xx_console_init); static struct uart_driver mpc52xx_uart_driver = { .owner = THIS_MODULE, .driver_name = "mpc52xx_psc_uart", - .dev_name = "ttyS", - .devfs_name = "ttyS", - .major = TTY_MAJOR, - .minor = 64, + .dev_name = "ttyPSC", + .devfs_name = "ttyPSC", + .major = SERIAL_PSC_MAJOR, + .minor = SERIAL_PSC_MINOR, .nr = MPC52xx_PSC_MAXNUM, .cons = MPC52xx_PSC_CONSOLE, }; |