diff options
Diffstat (limited to 'drivers')
252 files changed, 16361 insertions, 3733 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 7916f4b86d2..707650ab77a 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -84,4 +84,7 @@ source "drivers/auxdisplay/Kconfig" source "drivers/kvm/Kconfig" +source "drivers/uio/Kconfig" + +source "drivers/lguest/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 6d9d7fab77f..0ea8e3237c0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_FUSION) += message/ obj-$(CONFIG_FIREWIRE) += firewire/ obj-$(CONFIG_IEEE1394) += ieee1394/ +obj-$(CONFIG_UIO) += uio/ obj-y += cdrom/ obj-y += auxdisplay/ obj-$(CONFIG_MTD) += mtd/ @@ -72,6 +73,7 @@ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ +obj-$(CONFIG_LGUEST_GUEST) += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index bc7e16ec839..42127c0d612 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -217,10 +217,26 @@ static void acpi_hibernation_finish(void) } } +static int acpi_hibernation_pre_restore(void) +{ + acpi_status status; + + status = acpi_hw_disable_all_gpes(); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; +} + +static void acpi_hibernation_restore_cleanup(void) +{ + acpi_hw_enable_all_runtime_gpes(); +} + static struct hibernation_ops acpi_hibernation_ops = { .prepare = acpi_hibernation_prepare, .enter = acpi_hibernation_enter, .finish = acpi_hibernation_finish, + .pre_restore = acpi_hibernation_pre_restore, + .restore_cleanup = acpi_hibernation_restore_cleanup, }; #endif /* CONFIG_SOFTWARE_SUSPEND */ diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c index d9801eff648..39e40d56b03 100644 --- a/drivers/acpi/sleep/poweroff.c +++ b/drivers/acpi/sleep/poweroff.c @@ -39,7 +39,13 @@ int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_PM -void acpi_power_off(void) +static void acpi_power_off_prepare(void) +{ + /* Prepare to power off the system */ + acpi_sleep_prepare(ACPI_STATE_S5); +} + +static void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ printk("%s called\n", __FUNCTION__); @@ -48,30 +54,6 @@ void acpi_power_off(void) acpi_enter_sleep_state(ACPI_STATE_S5); } -static int acpi_shutdown(struct sys_device *x) -{ - switch (system_state) { - case SYSTEM_POWER_OFF: - /* Prepare to power off the system */ - return acpi_sleep_prepare(ACPI_STATE_S5); - case SYSTEM_SUSPEND_DISK: - /* Prepare to suspend the system to disk */ - return acpi_sleep_prepare(ACPI_STATE_S4); - default: - return 0; - } -} - -static struct sysdev_class acpi_sysclass = { - set_kset_name("acpi"), - .shutdown = acpi_shutdown -}; - -static struct sys_device device_acpi = { - .id = 0, - .cls = &acpi_sysclass, -}; - static int acpi_poweroff_init(void) { if (!acpi_disabled) { @@ -81,13 +63,8 @@ static int acpi_poweroff_init(void) status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); if (ACPI_SUCCESS(status)) { - int error; - error = sysdev_class_register(&acpi_sysclass); - if (!error) - error = sysdev_register(&device_acpi); - if (!error) - pm_power_off = acpi_power_off; - return error; + pm_power_off_prepare = acpi_power_off_prepare; + pm_power_off = acpi_power_off; } } return 0; diff --git a/drivers/base/core.c b/drivers/base/core.c index 0455aa78fa1..3599ab2506d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -24,6 +24,8 @@ #include "base.h" #include "power/power.h" +extern const char *kobject_actions[]; + int (*platform_notify)(struct device * dev) = NULL; int (*platform_notify_remove)(struct device * dev) = NULL; @@ -303,10 +305,25 @@ out: static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (memcmp(buf, "add", 3) != 0) - dev_err(dev, "uevent: unsupported action-string; this will " - "be ignored in a future kernel version"); + size_t len = count; + enum kobject_action action; + + if (len && buf[len-1] == '\n') + len--; + + for (action = 0; action < KOBJ_MAX; action++) { + if (strncmp(kobject_actions[action], buf, len) != 0) + continue; + if (kobject_actions[action][len] != '\0') + continue; + kobject_uevent(&dev->kobj, action); + goto out; + } + + dev_err(dev, "uevent: unsupported action-string; this will " + "be ignored in a future kernel version\n"); kobject_uevent(&dev->kobj, KOBJ_ADD); +out: return count; } @@ -643,6 +660,82 @@ static int setup_parent(struct device *dev, struct device *parent) return 0; } +static int device_add_class_symlinks(struct device *dev) +{ + int error; + + if (!dev->class) + return 0; + error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, + "subsystem"); + if (error) + goto out; + /* + * If this is not a "fake" compatible device, then create the + * symlink from the class to the device. + */ + if (dev->kobj.parent != &dev->class->subsys.kobj) { + error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, + dev->bus_id); + if (error) + goto out_subsys; + } + /* only bus-device parents get a "device"-link */ + if (dev->parent && dev->parent->bus) { + error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, + "device"); + if (error) + goto out_busid; +#ifdef CONFIG_SYSFS_DEPRECATED + { + char * class_name = make_class_name(dev->class->name, + &dev->kobj); + if (class_name) + error = sysfs_create_link(&dev->parent->kobj, + &dev->kobj, class_name); + kfree(class_name); + if (error) + goto out_device; + } +#endif + } + return 0; + +#ifdef CONFIG_SYSFS_DEPRECATED +out_device: + if (dev->parent) + sysfs_remove_link(&dev->kobj, "device"); +#endif +out_busid: + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); +out_subsys: + sysfs_remove_link(&dev->kobj, "subsystem"); +out: + return error; +} + +static void device_remove_class_symlinks(struct device *dev) +{ + if (!dev->class) + return; + if (dev->parent) { +#ifdef CONFIG_SYSFS_DEPRECATED + char *class_name; + + class_name = make_class_name(dev->class->name, &dev->kobj); + if (class_name) { + sysfs_remove_link(&dev->parent->kobj, class_name); + kfree(class_name); + } +#endif + sysfs_remove_link(&dev->kobj, "device"); + } + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); + sysfs_remove_link(&dev->kobj, "subsystem"); +} + /** * device_add - add device to device hierarchy. * @dev: device. @@ -657,7 +750,6 @@ static int setup_parent(struct device *dev, struct device *parent) int device_add(struct device *dev) { struct device *parent = NULL; - char *class_name = NULL; struct class_interface *class_intf; int error = -EINVAL; @@ -697,27 +789,9 @@ int device_add(struct device *dev) goto ueventattrError; } - if (dev->class) { - sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, - "subsystem"); - /* If this is not a "fake" compatible device, then create the - * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kobj) - sysfs_create_link(&dev->class->subsys.kobj, - &dev->kobj, dev->bus_id); - if (parent) { - sysfs_create_link(&dev->kobj, &dev->parent->kobj, - "device"); -#ifdef CONFIG_SYSFS_DEPRECATED - class_name = make_class_name(dev->class->name, - &dev->kobj); - if (class_name) - sysfs_create_link(&dev->parent->kobj, - &dev->kobj, class_name); -#endif - } - } - + error = device_add_class_symlinks(dev); + if (error) + goto SymlinkError; error = device_add_attrs(dev); if (error) goto AttrsError; @@ -744,7 +818,6 @@ int device_add(struct device *dev) up(&dev->class->sem); } Done: - kfree(class_name); put_device(dev); return error; BusError: @@ -755,6 +828,8 @@ int device_add(struct device *dev) BUS_NOTIFY_DEL_DEVICE, dev); device_remove_attrs(dev); AttrsError: + device_remove_class_symlinks(dev); + SymlinkError: if (MAJOR(dev->devt)) device_remove_file(dev, &devt_attr); @@ -1139,7 +1214,7 @@ int device_rename(struct device *dev, char *new_name) { char *old_class_name = NULL; char *new_class_name = NULL; - char *old_symlink_name = NULL; + char *old_device_name = NULL; int error; dev = get_device(dev); @@ -1153,42 +1228,49 @@ int device_rename(struct device *dev, char *new_name) old_class_name = make_class_name(dev->class->name, &dev->kobj); #endif - if (dev->class) { - old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); - if (!old_symlink_name) { - error = -ENOMEM; - goto out_free_old_class; - } - strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); + old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + if (!old_device_name) { + error = -ENOMEM; + goto out; } - + strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); error = kobject_rename(&dev->kobj, new_name); + if (error) { + strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); + goto out; + } #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { new_class_name = make_class_name(dev->class->name, &dev->kobj); if (new_class_name) { - sysfs_create_link(&dev->parent->kobj, &dev->kobj, - new_class_name); + error = sysfs_create_link(&dev->parent->kobj, + &dev->kobj, new_class_name); + if (error) + goto out; sysfs_remove_link(&dev->parent->kobj, old_class_name); } } #endif if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kobj, - old_symlink_name); - sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, - dev->bus_id); + sysfs_remove_link(&dev->class->subsys.kobj, old_device_name); + error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, + dev->bus_id); + if (error) { + /* Uh... how to unravel this if restoring can fail? */ + dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n", + __FUNCTION__, error); + } } +out: put_device(dev); kfree(new_class_name); - kfree(old_symlink_name); - out_free_old_class: kfree(old_class_name); + kfree(old_device_name); return error; } diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 91f230939c1..966a5e28741 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,10 +1,10 @@ obj-y := shutdown.o -obj-$(CONFIG_PM) += main.o suspend.o resume.o runtime.o sysfs.o +obj-$(CONFIG_PM) += main.o suspend.o resume.o sysfs.o obj-$(CONFIG_PM_TRACE) += trace.o ifeq ($(CONFIG_DEBUG_DRIVER),y) EXTRA_CFLAGS += -DDEBUG endif -ifeq ($(CONFIG_PM_DEBUG),y) +ifeq ($(CONFIG_PM_VERBOSE),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 2760f25b3ac..591a0dd5dee 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -62,11 +62,6 @@ extern int resume_device(struct device *); */ extern int suspend_device(struct device *, pm_message_t); - -/* - * runtime.c - */ - #else /* CONFIG_PM */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c deleted file mode 100644 index df6174d8586..00000000000 --- a/drivers/base/power/runtime.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * drivers/base/power/runtime.c - Handling dynamic device power management. - * - * Copyright (c) 2003 Patrick Mochel - * Copyright (c) 2003 Open Source Development Lab - * - */ - -#include <linux/device.h> -#include "power.h" - - -static void runtime_resume(struct device * dev) -{ - dev_dbg(dev, "resuming\n"); - if (!dev->power.power_state.event) - return; - if (!resume_device(dev)) - dev->power.power_state = PMSG_ON; -} - - -/** - * dpm_runtime_resume - Power one device back on. - * @dev: Device. - * - * Bring one device back to the on state by first powering it - * on, then restoring state. We only operate on devices that aren't - * already on. - * FIXME: We need to handle devices that are in an unknown state. - */ - -void dpm_runtime_resume(struct device * dev) -{ - mutex_lock(&dpm_mtx); - runtime_resume(dev); - mutex_unlock(&dpm_mtx); -} -EXPORT_SYMBOL(dpm_runtime_resume); - - -/** - * dpm_runtime_suspend - Put one device in low-power state. - * @dev: Device. - * @state: State to enter. - */ - -int dpm_runtime_suspend(struct device * dev, pm_message_t state) -{ - int error = 0; - - mutex_lock(&dpm_mtx); - if (dev->power.power_state.event == state.event) - goto Done; - - if (dev->power.power_state.event) - runtime_resume(dev); - - if (!(error = suspend_device(dev, state))) - dev->power.power_state = state; - Done: - mutex_unlock(&dpm_mtx); - return error; -} -EXPORT_SYMBOL(dpm_runtime_suspend); - - -#if 0 -/** - * dpm_set_power_state - Update power_state field. - * @dev: Device. - * @state: Power state device is in. - * - * This is an update mechanism for drivers to notify the core - * what power state a device is in. Device probing code may not - * always be able to tell, but we need accurate information to - * work reliably. - */ -void dpm_set_power_state(struct device * dev, pm_message_t state) -{ - mutex_lock(&dpm_mtx); - dev->power.power_state = state; - mutex_unlock(&dpm_mtx); -} -#endif /* 0 */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 2d47517dbe3..f2ed179cd69 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -7,69 +7,6 @@ #include "power.h" -#ifdef CONFIG_PM_SYSFS_DEPRECATED - -/** - * state - Control current power state of device - * - * show() returns the current power state of the device. '0' indicates - * the device is on. Other values (2) indicate the device is in some low - * power state. - * - * store() sets the current power state, which is an integer valued - * 0, 2, or 3. Devices with bus.suspend_late(), or bus.resume_early() - * methods fail this operation; those methods couldn't be called. - * Otherwise, - * - * - If the recorded dev->power.power_state.event matches the - * target value, nothing is done. - * - If the recorded event code is nonzero, the device is reactivated - * by calling bus.resume() and/or class.resume(). - * - If the target value is nonzero, the device is suspended by - * calling class.suspend() and/or bus.suspend() with event code - * PM_EVENT_SUSPEND. - * - * This mechanism is DEPRECATED and should only be used for testing. - */ - -static ssize_t state_show(struct device * dev, struct device_attribute *attr, char * buf) -{ - if (dev->power.power_state.event) - return sprintf(buf, "2\n"); - else - return sprintf(buf, "0\n"); -} - -static ssize_t state_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) -{ - pm_message_t state; - int error = -EINVAL; - - /* disallow incomplete suspend sequences */ - if (dev->bus && (dev->bus->suspend_late || dev->bus->resume_early)) - return error; - - state.event = PM_EVENT_SUSPEND; - /* Older apps expected to write "3" here - confused with PCI D3 */ - if ((n == 1) && !strcmp(buf, "3")) - error = dpm_runtime_suspend(dev, state); - - if ((n == 1) && !strcmp(buf, "2")) - error = dpm_runtime_suspend(dev, state); - - if ((n == 1) && !strcmp(buf, "0")) { - dpm_runtime_resume(dev); - error = 0; - } - - return error ? error : n; -} - -static DEVICE_ATTR(state, 0644, state_show, state_store); - - -#endif /* CONFIG_PM_SYSFS_DEPRECATED */ - /* * wakeup - Report/change current wakeup option for device * @@ -143,9 +80,6 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); static struct attribute * power_attrs[] = { -#ifdef CONFIG_PM_SYSFS_DEPRECATED - &dev_attr_state.attr, -#endif &dev_attr_wakeup.attr, NULL, }; diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 3e31532df0e..819c829125f 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_LGUEST_GUEST) += lguest_blk.o diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c new file mode 100644 index 00000000000..1634c2dd25e --- /dev/null +++ b/drivers/block/lguest_blk.c @@ -0,0 +1,275 @@ +/* A simple block driver for lguest. + * + * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +//#define DEBUG +#include <linux/init.h> +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/lguest_bus.h> + +static char next_block_index = 'a'; + +struct blockdev +{ + spinlock_t lock; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* The major number for this disk. */ + int major; + int irq; + + unsigned long phys_addr; + /* The mapped block page. */ + struct lguest_block_page *lb_page; + + /* We only have a single request outstanding at a time. */ + struct lguest_dma dma; + struct request *req; +}; + +/* Jens gave me this nice helper to end all chunks of a request. */ +static void end_entire_request(struct request *req, int uptodate) +{ + if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) + BUG(); + add_disk_randomness(req->rq_disk); + blkdev_dequeue_request(req); + end_that_request_last(req, uptodate); +} + +static irqreturn_t lgb_irq(int irq, void *_bd) +{ + struct blockdev *bd = _bd; + unsigned long flags; + + if (!bd->req) { + pr_debug("No work!\n"); + return IRQ_NONE; + } + + if (!bd->lb_page->result) { + pr_debug("No result!\n"); + return IRQ_NONE; + } + + spin_lock_irqsave(&bd->lock, flags); + end_entire_request(bd->req, bd->lb_page->result == 1); + bd->req = NULL; + bd->dma.used_len = 0; + blk_start_queue(bd->disk->queue); + spin_unlock_irqrestore(&bd->lock, flags); + return IRQ_HANDLED; +} + +static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) +{ + unsigned int i = 0, idx, len = 0; + struct bio *bio; + + rq_for_each_bio(bio, req) { + struct bio_vec *bvec; + bio_for_each_segment(bvec, bio, idx) { + BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); + BUG_ON(!bvec->bv_len); + dma->addr[i] = page_to_phys(bvec->bv_page) + + bvec->bv_offset; + dma->len[i] = bvec->bv_len; + len += bvec->bv_len; + i++; + } + } + if (i < LGUEST_MAX_DMA_SECTIONS) + dma->len[i] = 0; + return len; +} + +static void empty_dma(struct lguest_dma *dma) +{ + dma->len[0] = 0; +} + +static void setup_req(struct blockdev *bd, + int type, struct request *req, struct lguest_dma *dma) +{ + bd->lb_page->type = type; + bd->lb_page->sector = req->sector; + bd->lb_page->result = 0; + bd->req = req; + bd->lb_page->bytes = req_to_dma(req, dma); +} + +static void do_write(struct blockdev *bd, struct request *req) +{ + struct lguest_dma send; + + pr_debug("lgb: WRITE sector %li\n", (long)req->sector); + setup_req(bd, 1, req, &send); + + lguest_send_dma(bd->phys_addr, &send); +} + +static void do_read(struct blockdev *bd, struct request *req) +{ + struct lguest_dma ping; + + pr_debug("lgb: READ sector %li\n", (long)req->sector); + setup_req(bd, 0, req, &bd->dma); + + empty_dma(&ping); + lguest_send_dma(bd->phys_addr, &ping); +} + +static void do_lgb_request(request_queue_t *q) +{ + struct blockdev *bd; + struct request *req; + +again: + req = elv_next_request(q); + if (!req) + return; + + bd = req->rq_disk->private_data; + /* Sometimes we get repeated requests after blk_stop_queue. */ + if (bd->req) + return; + + if (!blk_fs_request(req)) { + pr_debug("Got non-command 0x%08x\n", req->cmd_type); + req->errors++; + end_entire_request(req, 0); + goto again; + } + + if (rq_data_dir(req) == WRITE) + do_write(bd, req); + else + do_read(bd, req); + + /* Wait for interrupt to tell us it's done. */ + blk_stop_queue(q); +} + +static struct block_device_operations lguestblk_fops = { + .owner = THIS_MODULE, +}; + +static int lguestblk_probe(struct lguest_device *lgdev) +{ + struct blockdev *bd; + int err; + int irqflags = IRQF_SHARED; + + bd = kmalloc(sizeof(*bd), GFP_KERNEL); + if (!bd) + return -ENOMEM; + + spin_lock_init(&bd->lock); + bd->irq = lgdev_irq(lgdev); + bd->req = NULL; + bd->dma.used_len = 0; + bd->dma.len[0] = 0; + bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); + + bd->lb_page = lguest_map(bd->phys_addr, 1); + if (!bd->lb_page) { + err = -ENOMEM; + goto out_free_bd; + } + + bd->major = register_blkdev(0, "lguestblk"); + if (bd->major < 0) { + err = bd->major; + goto out_unmap; + } + + bd->disk = alloc_disk(1); + if (!bd->disk) { + err = -ENOMEM; + goto out_unregister_blkdev; + } + + bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); + if (!bd->disk->queue) { + err = -ENOMEM; + goto out_put_disk; + } + + /* We can only handle a certain number of sg entries */ + blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); + /* Buffers must not cross page boundaries */ + blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); + + sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); + if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) + irqflags |= IRQF_SAMPLE_RANDOM; + err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); + if (err) + goto out_cleanup_queue; + + err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); + if (err) + goto out_free_irq; + + bd->disk->major = bd->major; + bd->disk->first_minor = 0; + bd->disk->private_data = bd; + bd->disk->fops = &lguestblk_fops; + /* This is initialized to the disk size by the other end. */ + set_capacity(bd->disk, bd->lb_page->num_sectors); + add_disk(bd->disk); + + printk(KERN_INFO "%s: device %i at major %d\n", + bd->disk->disk_name, lgdev->index, bd->major); + + lgdev->private = bd; + return 0; + +out_free_irq: + free_irq(bd->irq, bd); +out_cleanup_queue: + blk_cleanup_queue(bd->disk->queue); +out_put_disk: + put_disk(bd->disk); +out_unregister_blkdev: + unregister_blkdev(bd->major, "lguestblk"); +out_unmap: + lguest_unmap(bd->lb_page); +out_free_bd: + kfree(bd); + return err; +} + +static struct lguest_driver lguestblk_drv = { + .name = "lguestblk", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_BLOCK, + .probe = lguestblk_probe, +}; + +static __init int lguestblk_init(void) +{ + return register_lguest_driver(&lguestblk_drv); +} +module_init(lguestblk_init); + +MODULE_DESCRIPTION("Lguest block driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 54509eb3391..949ae93499e 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1608,7 +1608,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } #endif - host = kmalloc(sizeof(*host), GFP_KERNEL); + host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n", pci_name(pdev)); @@ -1616,7 +1616,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_regions; } - memset(host, 0, sizeof(*host)); host->pdev = pdev; host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 8852b8d643c..4e6f387fd18 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_N_HDLC) += n_hdlc.o obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o obj-$(CONFIG_SX) += sx.o generic_serial.o +obj-$(CONFIG_LGUEST_GUEST) += hvc_lguest.o obj-$(CONFIG_RIO) += rio/ generic_serial.o obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 7b02bf1289a..3d468f502d2 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -1721,12 +1721,11 @@ static int get_async_struct(int line, struct async_struct **ret_info) *ret_info = sstate->info; return 0; } - info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct async_struct), GFP_KERNEL); if (!info) { sstate->count--; return -ENOMEM; } - memset(info, 0, sizeof(struct async_struct)); #ifdef DECLARE_WAITQUEUE init_waitqueue_head(&info->open_wait); init_waitqueue_head(&info->close_wait); diff --git a/drivers/char/drm/via_dmablit.c b/drivers/char/drm/via_dmablit.c index fdb8609dd76..832de1d9ba7 100644 --- a/drivers/char/drm/via_dmablit.c +++ b/drivers/char/drm/via_dmablit.c @@ -273,10 +273,9 @@ via_alloc_desc_pages(drm_via_sg_info_t *vsg) vsg->num_desc_pages = (vsg->num_desc + vsg->descriptors_per_page - 1) / vsg->descriptors_per_page; - if (NULL == (vsg->desc_pages = kmalloc(sizeof(void *) * vsg->num_desc_pages, GFP_KERNEL))) + if (NULL == (vsg->desc_pages = kcalloc(vsg->num_desc_pages, sizeof(void *), GFP_KERNEL))) return DRM_ERR(ENOMEM); - memset(vsg->desc_pages, 0, sizeof(void *) * vsg->num_desc_pages); vsg->state = dr_via_desc_pages_alloc; for (i=0; i<vsg->num_desc_pages; ++i) { if (NULL == (vsg->desc_pages[i] = diff --git a/drivers/char/esp.c b/drivers/char/esp.c index 74cd5118af5..2e7ae42a550 100644 --- a/drivers/char/esp.c +++ b/drivers/char/esp.c @@ -2459,7 +2459,7 @@ static int __init espserial_init(void) return 1; } - info = kmalloc(sizeof(struct esp_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL); if (!info) { @@ -2469,7 +2469,6 @@ static int __init espserial_init(void) return 1; } - memset((void *)info, 0, sizeof(struct esp_struct)); spin_lock_init(&info->lock); /* rx_trigger, tx_trigger are needed by autoconfig */ info->config.rx_trigger = rx_trigger; @@ -2527,7 +2526,7 @@ static int __init espserial_init(void) if (!dma) info->stat_flags |= ESP_STAT_NEVER_DMA; - info = kmalloc(sizeof(struct esp_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL); if (!info) { printk(KERN_ERR "Couldn't allocate memory for esp serial device information\n"); @@ -2536,7 +2535,6 @@ static int __init espserial_init(void) return 0; } - memset((void *)info, 0, sizeof(struct esp_struct)); /* rx_trigger, tx_trigger are needed by autoconfig */ info->config.rx_trigger = rx_trigger; info->config.tx_trigger = tx_trigger; diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c new file mode 100644 index 00000000000..e7b889e404a --- /dev/null +++ b/drivers/char/hvc_lguest.c @@ -0,0 +1,102 @@ +/* Simple console for lguest. + * + * Copyright (C) 2006 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/lguest_bus.h> +#include "hvc_console.h" + +static char inbuf[256]; +static struct lguest_dma cons_input = { .used_len = 0, + .addr[0] = __pa(inbuf), + .len[0] = sizeof(inbuf), + .len[1] = 0 }; + +static int put_chars(u32 vtermno, const char *buf, int count) +{ + struct lguest_dma dma; + + /* FIXME: what if it's over a page boundary? */ + dma.len[0] = count; + dma.len[1] = 0; + dma.addr[0] = __pa(buf); + + lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma); + return count; +} + +static int get_chars(u32 vtermno, char *buf, int count) +{ + static int cons_offset; + + if (!cons_input.used_len) + return 0; + + if (cons_input.used_len - cons_offset < count) + count = cons_input.used_len - cons_offset; + + memcpy(buf, inbuf + cons_offset, count); + cons_offset += count; + if (cons_offset == cons_input.used_len) { + cons_offset = 0; + cons_input.used_len = 0; + } + return count; +} + +static struct hv_ops lguest_cons = { + .get_chars = get_chars, + .put_chars = put_chars, +}; + +static int __init cons_init(void) +{ + if (strcmp(paravirt_ops.name, "lguest") != 0) + return 0; + + return hvc_instantiate(0, 0, &lguest_cons); +} +console_initcall(cons_init); + +static int lguestcons_probe(struct lguest_device *lgdev) +{ + int err; + + lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256); + if (IS_ERR(lgdev->private)) + return PTR_ERR(lgdev->private); + + err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1, + lgdev_irq(lgdev)); + if (err) + printk("lguest console: failed to bind buffer.\n"); + return err; +} + +static struct lguest_driver lguestcons_drv = { + .name = "lguestcons", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_CONSOLE, + .probe = lguestcons_probe, +}; + +static int __init hvc_lguest_init(void) +{ + return register_lguest_driver(&lguestcons_drv); +} +module_init(hvc_lguest_init); diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 207f7343ba6..17f96e04266 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -784,12 +784,10 @@ static int __devinit hvcs_probe( return -EFAULT; } - hvcsd = kmalloc(sizeof(*hvcsd), GFP_KERNEL); + hvcsd = kzalloc(sizeof(*hvcsd), GFP_KERNEL); if (!hvcsd) return -ENODEV; - /* hvcsd->tty is zeroed out with the memset */ - memset(hvcsd, 0x00, sizeof(*hvcsd)); spin_lock_init(&hvcsd->lock); /* Automatically incs the refcount the first time */ diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index 83c7258d358..6005b522577 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -425,9 +425,7 @@ cleanup_module(void) printk(KERN_ERR "IP2: failed to unregister tty driver (%d)\n", err); } put_tty_driver(ip2_tty_driver); - if ( ( err = unregister_chrdev ( IP2_IPL_MAJOR, pcIpl ) ) ) { - printk(KERN_ERR "IP2: failed to unregister IPL driver (%d)\n", err); - } + unregister_chrdev(IP2_IPL_MAJOR, pcIpl); remove_proc_entry("ip2mem", &proc_root); // free memory diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index b5df7e61aeb..6a01dd9e43f 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2639,10 +2639,9 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, return -ENODEV; } - intf = kmalloc(sizeof(*intf), GFP_KERNEL); + intf = kzalloc(sizeof(*intf), GFP_KERNEL); if (!intf) return -ENOMEM; - memset(intf, 0, sizeof(*intf)); intf->ipmi_version_major = ipmi_version_major(device_id); intf->ipmi_version_minor = ipmi_version_minor(device_id); diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 57f9115a456..7ee5d944492 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -39,14 +39,14 @@ #else #define DBG(fmt...) #endif -int mbcs_major; +static int mbcs_major; -LIST_HEAD(soft_list); +static LIST_HEAD(soft_list); /* * file operations */ -const struct file_operations mbcs_ops = { +static const struct file_operations mbcs_ops = { .open = mbcs_open, .llseek = mbcs_sram_llseek, .read = mbcs_sram_read, @@ -377,7 +377,7 @@ dmaread_exit: return rv; } -int mbcs_open(struct inode *ip, struct file *fp) +static int mbcs_open(struct inode *ip, struct file *fp) { struct mbcs_soft *soft; int minor; @@ -394,7 +394,7 @@ int mbcs_open(struct inode *ip, struct file *fp) return -ENODEV; } -ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * off) +static ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * off) { struct cx_dev *cx_dev = fp->private_data; struct mbcs_soft *soft = cx_dev->soft; @@ -418,7 +418,7 @@ ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * return rv; } -ssize_t +static ssize_t mbcs_sram_write(struct file * fp, const char __user *buf, size_t len, loff_t * off) { struct cx_dev *cx_dev = fp->private_data; @@ -443,7 +443,7 @@ mbcs_sram_write(struct file * fp, const char __user *buf, size_t len, loff_t * o return rv; } -loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence) +static loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence) { loff_t newpos; @@ -491,7 +491,7 @@ static void mbcs_gscr_pioaddr_set(struct mbcs_soft *soft) soft->gscr_addr = mbcs_pioaddr(soft, MBCS_GSCR_START); } -int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) +static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) { struct cx_dev *cx_dev = fp->private_data; struct mbcs_soft *soft = cx_dev->soft; @@ -793,7 +793,7 @@ static int mbcs_remove(struct cx_dev *dev) return 0; } -const struct cx_device_id __devinitdata mbcs_id_table[] = { +static const struct cx_device_id __devinitdata mbcs_id_table[] = { { .part_num = MBCS_PART_NUM, .mfg_num = MBCS_MFG_NUM, @@ -807,7 +807,7 @@ const struct cx_device_id __devinitdata mbcs_id_table[] = { MODULE_DEVICE_TABLE(cx, mbcs_id_table); -struct cx_drv mbcs_driver = { +static struct cx_drv mbcs_driver = { .name = DEVICE_NAME, .id_table = mbcs_id_table, .probe = mbcs_probe, @@ -816,12 +816,7 @@ struct cx_drv mbcs_driver = { static void __exit mbcs_exit(void) { - int rv; - - rv = unregister_chrdev(mbcs_major, DEVICE_NAME); - if (rv < 0) - DBG(KERN_ALERT "Error in unregister_chrdev: %d\n", rv); - + unregister_chrdev(mbcs_major, DEVICE_NAME); cx_driver_unregister(&mbcs_driver); } diff --git a/drivers/char/mbcs.h b/drivers/char/mbcs.h index e7fd47e4325..c9905a3c335 100644 --- a/drivers/char/mbcs.h +++ b/drivers/char/mbcs.h @@ -542,12 +542,12 @@ struct mbcs_soft { struct semaphore algolock; }; -extern int mbcs_open(struct inode *ip, struct file *fp); -extern ssize_t mbcs_sram_read(struct file *fp, char __user *buf, size_t len, +static int mbcs_open(struct inode *ip, struct file *fp); +static ssize_t mbcs_sram_read(struct file *fp, char __user *buf, size_t len, loff_t * off); -extern ssize_t mbcs_sram_write(struct file *fp, const char __user *buf, size_t len, +static ssize_t mbcs_sram_write(struct file *fp, const char __user *buf, size_t len, loff_t * off); -extern loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence); -extern int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma); +static loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence); +static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma); #endif // __MBCS_H__ diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 13808f6083a..2b889317461 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -540,13 +540,12 @@ static int mgslpc_probe(struct pcmcia_device *link) if (debug_level >= DEBUG_LEVEL_INFO) printk("mgslpc_attach\n"); - info = kmalloc(sizeof(MGSLPC_INFO), GFP_KERNEL); + info = kzalloc(sizeof(MGSLPC_INFO), GFP_KERNEL); if (!info) { printk("Error can't allocate device instance data\n"); return -ENOMEM; } - memset(info, 0, sizeof(MGSLPC_INFO)); info->magic = MGSLPC_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 294e9cb0c44..0ce96670f97 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -803,9 +803,7 @@ static void *ckmalloc(int size) { void *p; - p = kmalloc(size, GFP_KERNEL); - if (p) - memset(p, 0, size); + p = kzalloc(size, GFP_KERNEL); return p; } diff --git a/drivers/char/rio/riocmd.c b/drivers/char/rio/riocmd.c index 8cc60b69346..7321d002c34 100644 --- a/drivers/char/rio/riocmd.c +++ b/drivers/char/rio/riocmd.c @@ -556,9 +556,7 @@ struct CmdBlk *RIOGetCmdBlk(void) { struct CmdBlk *CmdBlkP; - CmdBlkP = kmalloc(sizeof(struct CmdBlk), GFP_ATOMIC); - if (CmdBlkP) - memset(CmdBlkP, 0, sizeof(struct CmdBlk)); + CmdBlkP = kzalloc(sizeof(struct CmdBlk), GFP_ATOMIC); return CmdBlkP; } diff --git a/drivers/char/rio/riotable.c b/drivers/char/rio/riotable.c index 7e988357326..991119c9f47 100644 --- a/drivers/char/rio/riotable.c +++ b/drivers/char/rio/riotable.c @@ -863,8 +863,7 @@ int RIOReMapPorts(struct rio_info *p, struct Host *HostP, struct Map *HostMapP) if (PortP->TxRingBuffer) memset(PortP->TxRingBuffer, 0, p->RIOBufferSize); else if (p->RIOBufferSize) { - PortP->TxRingBuffer = kmalloc(p->RIOBufferSize, GFP_KERNEL); - memset(PortP->TxRingBuffer, 0, p->RIOBufferSize); + PortP->TxRingBuffer = kzalloc(p->RIOBufferSize, GFP_KERNEL); } PortP->TxBufferOut = 0; PortP->TxBufferIn = 0; diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 0270080ff0c..56cbba7b6ec 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -635,12 +635,11 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) ctlp = sCtlNumToCtlPtr(board); /* Get a r_port struct for the port, fill it in and save it globally, indexed by line number */ - info = kmalloc(sizeof (struct r_port), GFP_KERNEL); + info = kzalloc(sizeof (struct r_port), GFP_KERNEL); if (!info) { printk(KERN_INFO "Couldn't allocate info struct for line #%d\n", line); return; } - memset(info, 0, sizeof (struct r_port)); info->magic = RPORT_MAGIC; info->line = line; diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 93d0bb8b4c0..4a80b2f864e 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -4795,7 +4795,6 @@ static void __exit stallion_module_exit(void) { struct stlbrd *brdp; unsigned int i, j; - int retval; pr_debug("cleanup_module()\n"); @@ -4818,9 +4817,7 @@ static void __exit stallion_module_exit(void) for (i = 0; i < 4; i++) class_device_destroy(stallion_class, MKDEV(STL_SIOMEMMAJOR, i)); - if ((retval = unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"))) - printk("STALLION: failed to un-register serial memory device, " - "errno=%d\n", -retval); + unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"); class_destroy(stallion_class); pci_unregister_driver(&stl_pcidriver); diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index f53e51ddb9d..fdc256b380b 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -4324,13 +4324,12 @@ static struct mgsl_struct* mgsl_allocate_device(void) { struct mgsl_struct *info; - info = kmalloc(sizeof(struct mgsl_struct), + info = kzalloc(sizeof(struct mgsl_struct), GFP_KERNEL); if (!info) { printk("Error can't allocate device instance data\n"); } else { - memset(info, 0, sizeof(struct mgsl_struct)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, mgsl_bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 428b514201f..372a37e2562 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -3414,13 +3414,12 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev { struct slgt_info *info; - info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL); + info = kzalloc(sizeof(struct slgt_info), GFP_KERNEL); if (!info) { DBGERR(("%s device alloc failed adapter=%d port=%d\n", driver_name, adapter_num, port_num)); } else { - memset(info, 0, sizeof(struct slgt_info)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index a65407b3207..c63013b2fc3 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -3786,14 +3786,13 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev) { SLMP_INFO *info; - info = kmalloc(sizeof(SLMP_INFO), + info = kzalloc(sizeof(SLMP_INFO), GFP_KERNEL); if (!info) { printk("%s(%d) Error can't allocate device instance data for adapter %d, port %d\n", __FILE__,__LINE__, adapter_num, port_num); } else { - memset(info, 0, sizeof(SLMP_INFO)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index db57277117b..e12275df6ea 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -1098,15 +1098,10 @@ static int chg_state(int index, unsigned char new_state, struct file *file) /* Cleanup */ static void __exit viotap_exit(void) { - int ret; - remove_proc_entry("iSeries/viotape", NULL); vio_unregister_driver(&viotape_driver); class_destroy(tape_class); - ret = unregister_chrdev(VIOTAPE_MAJOR, "viotape"); - if (ret < 0) - printk(VIOTAPE_KERN_WARN "Error unregistering device: %d\n", - ret); + unregister_chrdev(VIOTAPE_MAJOR, "viotape"); if (viotape_unitinfo) dma_free_coherent(iSeries_vio_dev, sizeof(viotape_unitinfo[0]) * VIOTAPE_MAX_TAPE, diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c index e88947f8fe5..0d2b2773541 100644 --- a/drivers/char/watchdog/mpcore_wdt.c +++ b/drivers/char/watchdog/mpcore_wdt.c @@ -328,12 +328,11 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev) goto err_out; } - wdt = kmalloc(sizeof(struct mpcore_wdt), GFP_KERNEL); + wdt = kzalloc(sizeof(struct mpcore_wdt), GFP_KERNEL); if (!wdt) { ret = -ENOMEM; goto err_out; } - memset(wdt, 0, sizeof(struct mpcore_wdt)); wdt->dev = &dev->dev; wdt->irq = platform_get_irq(dev, 0); diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index 1e7a6719d5b..0f3fd6c9c35 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -626,12 +626,11 @@ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_devi maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe)); /* allocate memory for our device and initialize it */ - usb_pcwd = kmalloc (sizeof(struct usb_pcwd_private), GFP_KERNEL); + usb_pcwd = kzalloc (sizeof(struct usb_pcwd_private), GFP_KERNEL); if (usb_pcwd == NULL) { printk(KERN_ERR PFX "Out of memory\n"); goto error; } - memset (usb_pcwd, 0x00, sizeof (*usb_pcwd)); usb_pcwd_device = usb_pcwd; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index debf1d8e8b4..1724c41d241 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -3,18 +3,18 @@ # Copyright (c) 2003 Linux Networx # Licensed and distributed under the GPL # -# $Id: Kconfig,v 1.4.2.7 2005/07/08 22:05:38 dsp_llnl Exp $ -# menuconfig EDAC - tristate "EDAC - error detection and reporting (EXPERIMENTAL)" + bool "EDAC - error detection and reporting (EXPERIMENTAL)" depends on HAS_IOMEM - depends on X86 && EXPERIMENTAL + depends on EXPERIMENTAL + depends on X86 || MIPS || PPC help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or - supporting chipset: memory errors, cache errors, PCI errors, - thermal throttling, etc.. If unsure, select 'Y'. + supporting chipset or other subsystems: + memory errors, cache errors, PCI errors, thermal throttling, etc.. + If unsure, select 'Y'. If this code is reporting problems on your system, please see the EDAC project web pages for more information at: @@ -73,6 +73,14 @@ config EDAC_E752X Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. +config EDAC_I82443BXGX + tristate "Intel 82443BX/GX (440BX/GX)" + depends on EDAC_MM_EDAC && PCI && X86_32 + depends on BROKEN + help + Support for error detection and correction on the Intel + 82443BX/GX memory controllers (440BX/GX chipsets). + config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" depends on EDAC_MM_EDAC && PCI && X86_32 @@ -80,6 +88,20 @@ config EDAC_I82875P Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. +config EDAC_I82975X + tristate "Intel 82975x (D82975x)" + depends on EDAC_MM_EDAC && PCI && X86 + help + Support for error detection and correction on the Intel + DP82975x server chipsets. + +config EDAC_I3000 + tristate "Intel 3000/3010" + depends on EDAC_MM_EDAC && PCI && X86_32 + help + Support for error detection and correction on the Intel + 3000 and 3010 server chipsets. + config EDAC_I82860 tristate "Intel 82860" depends on EDAC_MM_EDAC && PCI && X86_32 @@ -94,15 +116,20 @@ config EDAC_R82600 Support for error detection and correction on the Radisys 82600 embedded chipset. -choice - prompt "Error detecting method" - default EDAC_POLL +config EDAC_I5000 + tristate "Intel Greencreek/Blackford chipset" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + Greekcreek/Blackford chipsets. -config EDAC_POLL - bool "Poll for errors" +config EDAC_PASEMI + tristate "PA Semi PWRficient" + depends on EDAC_MM_EDAC && PCI + depends on PPC help - Poll the chipset periodically to detect errors. + Support for error detection and correction on PA Semi + PWRficient. -endchoice endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 93137fdab4b..02c09f0ff15 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -5,14 +5,27 @@ # This file may be distributed under the terms of the # GNU General Public License. # -# $Id: Makefile,v 1.4.2.3 2005/07/08 22:05:38 dsp_llnl Exp $ -obj-$(CONFIG_EDAC_MM_EDAC) += edac_mc.o +obj-$(CONFIG_EDAC) := edac_stub.o +obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o + +edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-objs += edac_module.o edac_device_sysfs.o + +ifdef CONFIG_PCI +edac_core-objs += edac_pci.o edac_pci_sysfs.o +endif + obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o +obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o +obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o +obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o +obj-$(CONFIG_EDAC_I3000) += i3000_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o +obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index f79f6b587bf..f2207541059 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -17,9 +17,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define AMD76X_REVISION " Ver: 2.0.1 " __DATE__ +#define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "amd76x_edac" #define amd76x_printk(level, fmt, arg...) \ @@ -86,13 +86,13 @@ struct amd76x_dev_info { static const struct amd76x_dev_info amd76x_devs[] = { [AMD761] = { - .ctl_name = "AMD761" - }, + .ctl_name = "AMD761"}, [AMD762] = { - .ctl_name = "AMD762" - }, + .ctl_name = "AMD762"}, }; +static struct edac_pci_ctl_info *amd76x_pci; + /** * amd76x_get_error_info - fetch error information * @mci: Memory controller @@ -102,21 +102,21 @@ static const struct amd76x_dev_info amd76x_devs[] = { * on the chip so that further errors will be reported */ static void amd76x_get_error_info(struct mem_ctl_info *mci, - struct amd76x_error_info *info) + struct amd76x_error_info *info) { struct pci_dev *pdev; pdev = to_pci_dev(mci->dev); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, - &info->ecc_mode_status); + &info->ecc_mode_status); if (info->ecc_mode_status & BIT(8)) pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, - (u32) BIT(8), (u32) BIT(8)); + (u32) BIT(8), (u32) BIT(8)); if (info->ecc_mode_status & BIT(9)) pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, - (u32) BIT(9), (u32) BIT(9)); + (u32) BIT(9), (u32) BIT(9)); } /** @@ -130,7 +130,8 @@ static void amd76x_get_error_info(struct mem_ctl_info *mci, * then attempt to handle and clean up after the error */ static int amd76x_process_error_info(struct mem_ctl_info *mci, - struct amd76x_error_info *info, int handle_errors) + struct amd76x_error_info *info, + int handle_errors) { int error_found; u32 row; @@ -138,7 +139,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, error_found = 0; /* - * Check for an uncorrectable error + * Check for an uncorrectable error */ if (info->ecc_mode_status & BIT(8)) { error_found = 1; @@ -146,12 +147,12 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = (info->ecc_mode_status >> 4) & 0xf; edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0, - row, mci->ctl_name); + row, mci->ctl_name); } } /* - * Check for a correctable error + * Check for a correctable error */ if (info->ecc_mode_status & BIT(9)) { error_found = 1; @@ -159,7 +160,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = info->ecc_mode_status & 0xf; edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0, - 0, row, 0, mci->ctl_name); + 0, row, 0, mci->ctl_name); } } @@ -182,7 +183,7 @@ static void amd76x_check(struct mem_ctl_info *mci) } static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - enum edac_type edac_mode) + enum edac_type edac_mode) { struct csrow_info *csrow; u32 mba, mba_base, mba_mask, dms; @@ -193,8 +194,7 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, /* find the DRAM Chip Select Base address and mask */ pci_read_config_dword(pdev, - AMD76X_MEM_BASE_ADDR + (index * 4), - &mba); + AMD76X_MEM_BASE_ADDR + (index * 4), &mba); if (!(mba & BIT(0))) continue; @@ -238,7 +238,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s()\n", __func__); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems); ems_mode = (ems >> 10) & 0x3; - mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS); + mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0); if (mci == NULL) { return -ENOMEM; @@ -249,24 +249,36 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; mci->edac_cap = ems_mode ? - (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; + (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; mci->mod_name = EDAC_MOD_STR; mci->mod_ver = AMD76X_REVISION; mci->ctl_name = amd76x_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = amd76x_check; mci->ctl_page_to_phys = NULL; amd76x_init_csrows(mci, pdev, ems_modes[ems_mode]); - amd76x_get_error_info(mci, &discard); /* clear counters */ + amd76x_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } + /* allocating generic PCI control info */ + amd76x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!amd76x_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -278,7 +290,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit amd76x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -300,6 +312,9 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (amd76x_pci) + edac_pci_release_generic_ctl(amd76x_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -308,16 +323,14 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - AMD762 - }, + PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + AMD762}, { - PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - AMD761 - }, + PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + AMD761}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 8bcc887692a..3bba224cb55 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -22,13 +22,16 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include <linux/edac.h> +#include "edac_core.h" -#define E752X_REVISION " Ver: 2.0.1 " __DATE__ +#define E752X_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "e752x_edac" static int force_function_unhide; +static struct edac_pci_ctl_info *e752x_pci; + #define e752x_printk(level, fmt, arg...) \ edac_printk(level, "e752x", fmt, ##arg) @@ -203,25 +206,22 @@ static const struct e752x_dev_info e752x_devs[] = { [E7520] = { .err_dev = PCI_DEVICE_ID_INTEL_7520_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7520_0, - .ctl_name = "E7520" - }, + .ctl_name = "E7520"}, [E7525] = { .err_dev = PCI_DEVICE_ID_INTEL_7525_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7525_0, - .ctl_name = "E7525" - }, + .ctl_name = "E7525"}, [E7320] = { .err_dev = PCI_DEVICE_ID_INTEL_7320_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7320_0, - .ctl_name = "E7320" - }, + .ctl_name = "E7320"}, }; static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, - unsigned long page) + unsigned long page) { u32 remap; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -241,13 +241,13 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, } static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, - u32 sec1_add, u16 sec1_syndrome) + u32 sec1_add, u16 sec1_syndrome) { u32 page; int row; int channel; int i; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -261,7 +261,8 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, e752x_printk(KERN_WARNING, "Test row %d Table %d %d %d %d %d %d %d %d\n", row, pvt->map[0], pvt->map[1], pvt->map[2], pvt->map[3], - pvt->map[4], pvt->map[5], pvt->map[6], pvt->map[7]); + pvt->map[4], pvt->map[5], pvt->map[6], + pvt->map[7]); /* test for channel remapping */ for (i = 0; i < 8; i++) { @@ -275,24 +276,22 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, row = i; else e752x_mc_printk(mci, KERN_WARNING, - "row %d not found in remap table\n", row); + "row %d not found in remap table\n", + row); } else row = edac_mc_find_csrow_by_page(mci, page); /* 0 = channel A, 1 = channel B */ channel = !(error_one & 1); - if (!pvt->map_type) - row = 7 - row; - /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4), sec1_syndrome, row, channel, "e752x CE"); } static inline void process_ce(struct mem_ctl_info *mci, u16 error_one, - u32 sec1_add, u16 sec1_syndrome, int *error_found, - int handle_error) + u32 sec1_add, u16 sec1_syndrome, int *error_found, + int handle_error) { *error_found = 1; @@ -301,11 +300,11 @@ static inline void process_ce(struct mem_ctl_info *mci, u16 error_one, } static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, - u32 ded_add, u32 scrb_add) + u32 ded_add, u32 scrb_add) { u32 error_2b, block_page; int row; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -316,14 +315,14 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, block_page = error_2b >> (PAGE_SHIFT - 4); row = pvt->mc_symmetric ? - /* chip select are bits 14 & 13 */ + /* chip select are bits 14 & 13 */ ((block_page >> 1) & 3) : edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Read"); + offset_in_page(error_2b << 4), + row, "e752x UE from Read"); } if (error_one & 0x0404) { error_2b = scrb_add; @@ -332,19 +331,20 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, block_page = error_2b >> (PAGE_SHIFT - 4); row = pvt->mc_symmetric ? - /* chip select are bits 14 & 13 */ + /* chip select are bits 14 & 13 */ ((block_page >> 1) & 3) : edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Scruber"); + offset_in_page(error_2b << 4), + row, "e752x UE from Scruber"); } } static inline void process_ue(struct mem_ctl_info *mci, u16 error_one, - u32 ded_add, u32 scrb_add, int *error_found, int handle_error) + u32 ded_add, u32 scrb_add, int *error_found, + int handle_error) { *error_found = 1; @@ -353,7 +353,7 @@ static inline void process_ue(struct mem_ctl_info *mci, u16 error_one, } static inline void process_ue_no_info_wr(struct mem_ctl_info *mci, - int *error_found, int handle_error) + int *error_found, int handle_error) { *error_found = 1; @@ -365,24 +365,24 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci, } static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error, - u32 retry_add) + u32 retry_add) { u32 error_1b, page; int row; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; error_1b = retry_add; - page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ - row = pvt->mc_symmetric ? - ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ + page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ + row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ edac_mc_find_csrow_by_page(mci, page); e752x_mc_printk(mci, KERN_WARNING, - "CE page 0x%lx, row %d : Memory read retry\n", - (long unsigned int) page, row); + "CE page 0x%lx, row %d : Memory read retry\n", + (long unsigned int)page, row); } static inline void process_ded_retry(struct mem_ctl_info *mci, u16 error, - u32 retry_add, int *error_found, int handle_error) + u32 retry_add, int *error_found, + int handle_error) { *error_found = 1; @@ -391,7 +391,7 @@ static inline void process_ded_retry(struct mem_ctl_info *mci, u16 error, } static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error, - int *error_found, int handle_error) + int *error_found, int handle_error) { *error_found = 1; @@ -420,7 +420,7 @@ static void do_global_error(int fatal, u32 errors) } static inline void global_error(int fatal, u32 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -447,7 +447,7 @@ static void do_hub_error(int fatal, u8 errors) } static inline void hub_error(int fatal, u8 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -505,7 +505,7 @@ static void do_sysbus_error(int fatal, u32 errors) } static inline void sysbus_error(int fatal, u32 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -514,7 +514,7 @@ static inline void sysbus_error(int fatal, u32 errors, int *error_found, } static void e752x_check_hub_interface(struct e752x_error_info *info, - int *error_found, int handle_error) + int *error_found, int handle_error) { u8 stat8; @@ -522,33 +522,32 @@ static void e752x_check_hub_interface(struct e752x_error_info *info, stat8 = info->hi_ferr; - if(stat8 & 0x7f) { /* Error, so process */ + if (stat8 & 0x7f) { /* Error, so process */ stat8 &= 0x7f; - if(stat8 & 0x2b) + if (stat8 & 0x2b) hub_error(1, stat8 & 0x2b, error_found, handle_error); - if(stat8 & 0x54) + if (stat8 & 0x54) hub_error(0, stat8 & 0x54, error_found, handle_error); } - //pci_read_config_byte(dev,E752X_HI_NERR,&stat8); stat8 = info->hi_nerr; - if(stat8 & 0x7f) { /* Error, so process */ + if (stat8 & 0x7f) { /* Error, so process */ stat8 &= 0x7f; if (stat8 & 0x2b) hub_error(1, stat8 & 0x2b, error_found, handle_error); - if(stat8 & 0x54) + if (stat8 & 0x54) hub_error(0, stat8 & 0x54, error_found, handle_error); } } static void e752x_check_sysbus(struct e752x_error_info *info, - int *error_found, int handle_error) + int *error_found, int handle_error) { u32 stat32, error32; @@ -556,47 +555,47 @@ static void e752x_check_sysbus(struct e752x_error_info *info, stat32 = info->sysbus_ferr + (info->sysbus_nerr << 16); if (stat32 == 0) - return; /* no errors */ + return; /* no errors */ error32 = (stat32 >> 16) & 0x3ff; stat32 = stat32 & 0x3ff; - if(stat32 & 0x087) + if (stat32 & 0x087) sysbus_error(1, stat32 & 0x087, error_found, handle_error); - if(stat32 & 0x378) + if (stat32 & 0x378) sysbus_error(0, stat32 & 0x378, error_found, handle_error); - if(error32 & 0x087) + if (error32 & 0x087) sysbus_error(1, error32 & 0x087, error_found, handle_error); - if(error32 & 0x378) + if (error32 & 0x378) sysbus_error(0, error32 & 0x378, error_found, handle_error); } -static void e752x_check_membuf (struct e752x_error_info *info, - int *error_found, int handle_error) +static void e752x_check_membuf(struct e752x_error_info *info, + int *error_found, int handle_error) { u8 stat8; stat8 = info->buf_ferr; - if (stat8 & 0x0f) { /* Error, so process */ + if (stat8 & 0x0f) { /* Error, so process */ stat8 &= 0x0f; membuf_error(stat8, error_found, handle_error); } stat8 = info->buf_nerr; - if (stat8 & 0x0f) { /* Error, so process */ + if (stat8 & 0x0f) { /* Error, so process */ stat8 &= 0x0f; membuf_error(stat8, error_found, handle_error); } } -static void e752x_check_dram (struct mem_ctl_info *mci, - struct e752x_error_info *info, int *error_found, - int handle_error) +static void e752x_check_dram(struct mem_ctl_info *mci, + struct e752x_error_info *info, int *error_found, + int handle_error) { u16 error_one, error_next; @@ -604,55 +603,52 @@ static void e752x_check_dram (struct mem_ctl_info *mci, error_next = info->dram_nerr; /* decode and report errors */ - if(error_one & 0x0101) /* check first error correctable */ + if (error_one & 0x0101) /* check first error correctable */ process_ce(mci, error_one, info->dram_sec1_add, - info->dram_sec1_syndrome, error_found, - handle_error); + info->dram_sec1_syndrome, error_found, handle_error); - if(error_next & 0x0101) /* check next error correctable */ + if (error_next & 0x0101) /* check next error correctable */ process_ce(mci, error_next, info->dram_sec2_add, - info->dram_sec2_syndrome, error_found, - handle_error); + info->dram_sec2_syndrome, error_found, handle_error); - if(error_one & 0x4040) + if (error_one & 0x4040) process_ue_no_info_wr(mci, error_found, handle_error); - if(error_next & 0x4040) + if (error_next & 0x4040) process_ue_no_info_wr(mci, error_found, handle_error); - if(error_one & 0x2020) + if (error_one & 0x2020) process_ded_retry(mci, error_one, info->dram_retr_add, - error_found, handle_error); + error_found, handle_error); - if(error_next & 0x2020) + if (error_next & 0x2020) process_ded_retry(mci, error_next, info->dram_retr_add, - error_found, handle_error); + error_found, handle_error); - if(error_one & 0x0808) - process_threshold_ce(mci, error_one, error_found, - handle_error); + if (error_one & 0x0808) + process_threshold_ce(mci, error_one, error_found, handle_error); - if(error_next & 0x0808) + if (error_next & 0x0808) process_threshold_ce(mci, error_next, error_found, - handle_error); + handle_error); - if(error_one & 0x0606) + if (error_one & 0x0606) process_ue(mci, error_one, info->dram_ded_add, - info->dram_scrb_add, error_found, handle_error); + info->dram_scrb_add, error_found, handle_error); - if(error_next & 0x0606) + if (error_next & 0x0606) process_ue(mci, error_next, info->dram_ded_add, - info->dram_scrb_add, error_found, handle_error); + info->dram_scrb_add, error_found, handle_error); } -static void e752x_get_error_info (struct mem_ctl_info *mci, - struct e752x_error_info *info) +static void e752x_get_error_info(struct mem_ctl_info *mci, + struct e752x_error_info *info) { struct pci_dev *dev; struct e752x_pvt *pvt; memset(info, 0, sizeof(*info)); - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; dev = pvt->dev_d0f1; pci_read_config_dword(dev, E752X_FERR_GLOBAL, &info->ferr_global); @@ -661,8 +657,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, pci_read_config_word(dev, E752X_SYSBUS_FERR, &info->sysbus_ferr); pci_read_config_byte(dev, E752X_BUF_FERR, &info->buf_ferr); - pci_read_config_word(dev, E752X_DRAM_FERR, - &info->dram_ferr); + pci_read_config_word(dev, E752X_DRAM_FERR, &info->dram_ferr); pci_read_config_dword(dev, E752X_DRAM_SEC1_ADD, &info->dram_sec1_add); pci_read_config_word(dev, E752X_DRAM_SEC1_SYNDROME, @@ -688,7 +683,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, if (info->dram_ferr) pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, - info->dram_ferr, info->dram_ferr); + info->dram_ferr, info->dram_ferr); pci_write_config_dword(dev, E752X_FERR_GLOBAL, info->ferr_global); @@ -701,8 +696,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, pci_read_config_word(dev, E752X_SYSBUS_NERR, &info->sysbus_nerr); pci_read_config_byte(dev, E752X_BUF_NERR, &info->buf_nerr); - pci_read_config_word(dev, E752X_DRAM_NERR, - &info->dram_nerr); + pci_read_config_word(dev, E752X_DRAM_NERR, &info->dram_nerr); pci_read_config_dword(dev, E752X_DRAM_SEC2_ADD, &info->dram_sec2_add); pci_read_config_word(dev, E752X_DRAM_SEC2_SYNDROME, @@ -722,15 +716,16 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, if (info->dram_nerr) pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, - info->dram_nerr, info->dram_nerr); + info->dram_nerr, info->dram_nerr); pci_write_config_dword(dev, E752X_NERR_GLOBAL, info->nerr_global); } } -static int e752x_process_error_info (struct mem_ctl_info *mci, - struct e752x_error_info *info, int handle_errors) +static int e752x_process_error_info(struct mem_ctl_info *mci, + struct e752x_error_info *info, + int handle_errors) { u32 error32, stat32; int error_found; @@ -776,26 +771,38 @@ static inline int dual_channel_active(u16 ddrcsr) return (((ddrcsr >> 12) & 3) == 3); } +/* Remap csrow index numbers if map_type is "reverse" + */ +static inline int remap_csrow_index(struct mem_ctl_info *mci, int index) +{ + struct e752x_pvt *pvt = mci->pvt_info; + + if (!pvt->map_type) + return (7 - index); + + return (index); +} + static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - u16 ddrcsr) + u16 ddrcsr) { struct csrow_info *csrow; unsigned long last_cumul_size; int index, mem_dev, drc_chan; - int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */ - int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ + int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */ + int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ u8 value; u32 dra, drc, cumul_size; dra = 0; - for (index=0; index < 4; index++) { + for (index = 0; index < 4; index++) { u8 dra_reg; - pci_read_config_byte(pdev, E752X_DRA+index, &dra_reg); + pci_read_config_byte(pdev, E752X_DRA + index, &dra_reg); dra |= dra_reg << (index * 8); } pci_read_config_dword(pdev, E752X_DRC, &drc); drc_chan = dual_channel_active(ddrcsr); - drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ + drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ drc_ddim = (drc >> 20) & 0x3; /* The dram row boundary (DRB) reg values are boundary address for @@ -806,7 +813,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, for (last_cumul_size = index = 0; index < mci->nr_csrows; index++) { /* mem_dev 0=x8, 1=x4 */ mem_dev = (dra >> (index * 4 + 2)) & 0x3; - csrow = &mci->csrows[index]; + csrow = &mci->csrows[remap_csrow_index(mci, index)]; mem_dev = (mem_dev == 2); pci_read_config_byte(pdev, E752X_DRB + index, &value); @@ -843,10 +850,10 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, } static void e752x_init_mem_map_table(struct pci_dev *pdev, - struct e752x_pvt *pvt) + struct e752x_pvt *pvt) { int index; - u8 value, last, row, stat8; + u8 value, last, row; last = 0; row = 0; @@ -858,7 +865,7 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, /* no dimm in the slot, so flag it as empty */ pvt->map[index] = 0xff; pvt->map[index + 1] = 0xff; - } else { /* there is a dimm in the slot */ + } else { /* there is a dimm in the slot */ pvt->map[index] = row; row++; last = value; @@ -866,31 +873,25 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, * sided */ pci_read_config_byte(pdev, E752X_DRB + index + 1, - &value); - pvt->map[index + 1] = (value == last) ? - 0xff : /* the dimm is single sided, - so flag as empty */ - row; /* this is a double sided dimm - to save the next row # */ + &value); + + /* the dimm is single sided, so flag as empty */ + /* this is a double sided dimm to save the next row #*/ + pvt->map[index + 1] = (value == last) ? 0xff : row; row++; last = value; } } - - /* set the map type. 1 = normal, 0 = reversed */ - pci_read_config_byte(pdev, E752X_DRM, &stat8); - pvt->map_type = ((stat8 & 0x0f) > ((stat8 >> 4) & 0x0f)); } /* Return 0 on success or 1 on failure. */ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, - struct e752x_pvt *pvt) + struct e752x_pvt *pvt) { struct pci_dev *dev; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, - pvt->bridge_ck); + pvt->dev_info->err_dev, pvt->bridge_ck); if (pvt->bridge_ck == NULL) pvt->bridge_ck = pci_scan_single_device(pdev->bus, @@ -898,13 +899,13 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, if (pvt->bridge_ck == NULL) { e752x_printk(KERN_ERR, "error reporting device not found:" - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); return 1; } dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, - NULL); + NULL); if (dev == NULL) goto fail; @@ -942,12 +943,22 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) struct mem_ctl_info *mci; struct e752x_pvt *pvt; u16 ddrcsr; - int drc_chan; /* Number of channels 0=1chan,1=2chan */ + int drc_chan; /* Number of channels 0=1chan,1=2chan */ struct e752x_error_info discard; debugf0("%s(): mci\n", __func__); debugf0("Starting Probe1\n"); + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + /* check to see if device 0 function 1 is enabled; if it isn't, we * assume the BIOS has reserved it for a reason and is expecting * exclusive access, we take care not to violate that assumption and @@ -966,7 +977,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) /* Dual channel = 1, Single channel = 0 */ drc_chan = dual_channel_active(ddrcsr); - mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1); + mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0); if (mci == NULL) { return -ENOMEM; @@ -975,14 +986,14 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): init mci\n", __func__); mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | - EDAC_FLAG_S4ECD4ED; + EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E752X_REVISION; mci->dev = &pdev->dev; debugf3("%s(): init pvt\n", __func__); - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; pvt->dev_info = &e752x_devs[dev_idx]; pvt->mc_symmetric = ((ddrcsr & 0x10) != 0); @@ -993,16 +1004,20 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): more mci init\n", __func__); mci->ctl_name = pvt->dev_info->ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = e752x_check; mci->ctl_page_to_phys = ctl_page_to_phys; - e752x_init_csrows(mci, pdev, ddrcsr); - e752x_init_mem_map_table(pdev, pvt); - - /* set the map type. 1 = normal, 0 = reversed */ + /* set the map type. 1 = normal, 0 = reversed + * Must be set before e752x_init_csrows in case csrow mapping + * is reversed. + */ pci_read_config_byte(pdev, E752X_DRM, &stat8); pvt->map_type = ((stat8 & 0x0f) > ((stat8 >> 4) & 0x0f)); + e752x_init_csrows(mci, pdev, ddrcsr); + e752x_init_mem_map_table(pdev, pvt); + mci->edac_cap |= EDAC_FLAG_NONE; debugf3("%s(): tolm, remapbase, remaplimit\n", __func__); @@ -1014,19 +1029,29 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) pci_read_config_word(pdev, E752X_REMAPLIMIT, &pci_data); pvt->remaplimit = ((u32) pci_data) << 14; e752x_printk(KERN_INFO, - "tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm, - pvt->remapbase, pvt->remaplimit); + "tolm = %x, remapbase = %x, remaplimit = %x\n", + pvt->tolm, pvt->remapbase, pvt->remaplimit); /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } e752x_init_error_reporting_regs(pvt); - e752x_get_error_info(mci, &discard); /* clear other MCH errors */ + e752x_get_error_info(mci, &discard); /* clear other MCH errors */ + + /* allocating generic PCI control info */ + e752x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!e752x_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } /* get this far and it's successful */ debugf3("%s(): success\n", __func__); @@ -1043,12 +1068,12 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit e752x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); /* wake up and enable device */ - if(pci_enable_device(pdev) < 0) + if (pci_enable_device(pdev) < 0) return -EIO; return e752x_probe1(pdev, ent->driver_data); @@ -1061,10 +1086,13 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (e752x_pci) + edac_pci_release_generic_ctl(e752x_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); pci_dev_put(pvt->bridge_ck); @@ -1073,20 +1101,17 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7520 - }, + PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7520}, { - PCI_VEND_DEV(INTEL, 7525_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7525 - }, + PCI_VEND_DEV(INTEL, 7525_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7525}, { - PCI_VEND_DEV(INTEL, 7320_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7320 - }, + PCI_VEND_DEV(INTEL, 7320_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7320}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); @@ -1122,5 +1147,6 @@ MODULE_DESCRIPTION("MC support for Intel e752x memory controllers"); module_param(force_function_unhide, int, 0444); MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" -" 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); - + " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 310d91b41c9..96ecc492664 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -27,9 +27,10 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include <linux/edac.h> +#include "edac_core.h" -#define E7XXX_REVISION " Ver: 2.0.1 " __DATE__ +#define E7XXX_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "e7xxx_edac" #define e7xxx_printk(level, fmt, arg...) \ @@ -143,23 +144,21 @@ struct e7xxx_error_info { u32 dram_uelog_add; }; +static struct edac_pci_ctl_info *e7xxx_pci; + static const struct e7xxx_dev_info e7xxx_devs[] = { [E7500] = { .err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR, - .ctl_name = "E7500" - }, + .ctl_name = "E7500"}, [E7501] = { .err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR, - .ctl_name = "E7501" - }, + .ctl_name = "E7501"}, [E7505] = { .err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR, - .ctl_name = "E7505" - }, + .ctl_name = "E7505"}, [E7205] = { .err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR, - .ctl_name = "E7205" - }, + .ctl_name = "E7205"}, }; /* FIXME - is this valid for both SECDED and S4ECD4ED? */ @@ -180,15 +179,15 @@ static inline int e7xxx_find_channel(u16 syndrome) } static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, - unsigned long page) + unsigned long page) { u32 remap; - struct e7xxx_pvt *pvt = (struct e7xxx_pvt *) mci->pvt_info; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); if ((page < pvt->tolm) || - ((page >= 0x100000) && (page < pvt->remapbase))) + ((page >= 0x100000) && (page < pvt->remapbase))) return page; remap = (page - pvt->tolm) + pvt->remapbase; @@ -200,8 +199,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, return pvt->tolm - 1; } -static void process_ce(struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) { u32 error_1b, page; u16 syndrome; @@ -212,7 +210,7 @@ static void process_ce(struct mem_ctl_info *mci, /* read the error address */ error_1b = info->dram_celog_add; /* FIXME - should use PAGE_SHIFT */ - page = error_1b >> 6; /* convert the address to 4k page */ + page = error_1b >> 6; /* convert the address to 4k page */ /* read the syndrome */ syndrome = info->dram_celog_syndrome; /* FIXME - check for -1 */ @@ -228,8 +226,7 @@ static void process_ce_no_info(struct mem_ctl_info *mci) edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow"); } -static void process_ue(struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info) { u32 error_2b, block_page; int row; @@ -238,7 +235,7 @@ static void process_ue(struct mem_ctl_info *mci, /* read the error address */ error_2b = info->dram_uelog_add; /* FIXME - should use PAGE_SHIFT */ - block_page = error_2b >> 6; /* convert to 4k address */ + block_page = error_2b >> 6; /* convert to 4k address */ row = edac_mc_find_csrow_by_page(mci, block_page); edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE"); } @@ -249,16 +246,14 @@ static void process_ue_no_info(struct mem_ctl_info *mci) edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow"); } -static void e7xxx_get_error_info (struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void e7xxx_get_error_info(struct mem_ctl_info *mci, + struct e7xxx_error_info *info) { struct e7xxx_pvt *pvt; - pvt = (struct e7xxx_pvt *) mci->pvt_info; - pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, - &info->dram_ferr); - pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, - &info->dram_nerr); + pvt = (struct e7xxx_pvt *)mci->pvt_info; + pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr); + pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr); if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) { pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD, @@ -279,8 +274,9 @@ static void e7xxx_get_error_info (struct mem_ctl_info *mci, pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); } -static int e7xxx_process_error_info (struct mem_ctl_info *mci, - struct e7xxx_error_info *info, int handle_errors) +static int e7xxx_process_error_info(struct mem_ctl_info *mci, + struct e7xxx_error_info *info, + int handle_errors) { int error_found; @@ -341,7 +337,6 @@ static inline int dual_channel_active(u32 drc, int dev_idx) return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1; } - /* Return DRB granularity (0=32mb, 1=64mb). */ static inline int drb_granularity(u32 drc, int dev_idx) { @@ -349,9 +344,8 @@ static inline int drb_granularity(u32 drc, int dev_idx) return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1; } - static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - int dev_idx, u32 drc) + int dev_idx, u32 drc) { unsigned long last_cumul_size; int index; @@ -419,10 +413,21 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) struct e7xxx_error_info discard; debugf0("%s(): mci\n", __func__); + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + pci_read_config_dword(pdev, E7XXX_DRC, &drc); drc_chan = dual_channel_active(drc, dev_idx); - mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1); + mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0); if (mci == NULL) return -ENOMEM; @@ -430,17 +435,16 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): init mci\n", __func__); mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | - EDAC_FLAG_S4ECD4ED; + EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E7XXX_REVISION; mci->dev = &pdev->dev; debugf3("%s(): init pvt\n", __func__); - pvt = (struct e7xxx_pvt *) mci->pvt_info; + pvt = (struct e7xxx_pvt *)mci->pvt_info; pvt->dev_info = &e7xxx_devs[dev_idx]; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, - pvt->bridge_ck); + pvt->dev_info->err_dev, pvt->bridge_ck); if (!pvt->bridge_ck) { e7xxx_printk(KERN_ERR, "error reporting device not found:" @@ -451,6 +455,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): more mci init\n", __func__); mci->ctl_name = pvt->dev_info->ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = e7xxx_check; mci->ctl_page_to_phys = ctl_page_to_phys; e7xxx_init_csrows(mci, pdev, dev_idx, drc); @@ -473,11 +478,22 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail1; } + /* allocating generic PCI control info */ + e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!e7xxx_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -493,7 +509,7 @@ fail0: /* returns count (>= 0), or negative on error */ static int __devinit e7xxx_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -509,34 +525,33 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (e7xxx_pci) + edac_pci_release_generic_ctl(e7xxx_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct e7xxx_pvt *) mci->pvt_info; + pvt = (struct e7xxx_pvt *)mci->pvt_info; pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); } static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7205 - }, + PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7205}, { - PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7500 - }, + PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7500}, { - PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7501 - }, + PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7501}, { - PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7505 - }, + PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7505}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); @@ -563,5 +578,7 @@ module_exit(e7xxx_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on.work by Dan Hollis et al"); + "Based on.work by Dan Hollis et al"); MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_core.h index 713444cc410..4e6bad15c4b 100644 --- a/drivers/edac/edac_mc.h +++ b/drivers/edac/edac_core.h @@ -1,6 +1,7 @@ /* - * MC kernel module - * (C) 2003 Linux Networx (http://lnxi.com) + * Defines, structures, APIs for edac_core module + * + * (C) 2007 Linux Networx (http://lnxi.com) * This file may be distributed under the terms of the * GNU General Public License. * @@ -11,12 +12,13 @@ * NMI handling support added by * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> * - * $Id: edac_mc.h,v 1.4.2.10 2005/10/05 00:43:44 dsp_llnl Exp $ + * Refactored for multi-source files: + * Doug Thompson <norsk5@xmission.com> * */ -#ifndef _EDAC_MC_H_ -#define _EDAC_MC_H_ +#ifndef _EDAC_CORE_H_ +#define _EDAC_CORE_H_ #include <linux/kernel.h> #include <linux/types.h> @@ -30,9 +32,14 @@ #include <linux/completion.h> #include <linux/kobject.h> #include <linux/platform_device.h> +#include <linux/sysdev.h> +#include <linux/workqueue.h> +#include <linux/version.h> #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 +#define EDAC_DEVICE_NAME_LEN 31 +#define EDAC_ATTRIB_VALUE_LEN 15 +#define MC_PROC_NAME_MAX_LEN 7 #if PAGE_SHIFT < 20 #define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) ) @@ -49,6 +56,14 @@ #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) +/* edac_device printk */ +#define edac_device_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) + +/* edac_pci printk */ +#define edac_pci_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) + /* prefixes for edac_printk() and edac_mc_printk() */ #define EDAC_MC "MC" #define EDAC_PCI "PCI" @@ -60,7 +75,7 @@ extern int edac_debug_level; #define edac_debug_printk(level, fmt, arg...) \ do { \ if (level <= edac_debug_level) \ - edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \ + edac_printk(KERN_EMERG, EDAC_DEBUG, fmt, ##arg); \ } while(0) #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) @@ -69,7 +84,7 @@ extern int edac_debug_level; #define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ ) #define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ ) -#else /* !CONFIG_EDAC_DEBUG */ +#else /* !CONFIG_EDAC_DEBUG */ #define debugf0( ... ) #define debugf1( ... ) @@ -77,18 +92,14 @@ extern int edac_debug_level; #define debugf3( ... ) #define debugf4( ... ) -#endif /* !CONFIG_EDAC_DEBUG */ +#endif /* !CONFIG_EDAC_DEBUG */ #define BIT(x) (1 << (x)) #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ PCI_DEVICE_ID_ ## vend ## _ ## dev -#if defined(CONFIG_X86) && defined(CONFIG_PCI) -#define dev_name(dev) pci_name(to_pci_dev(dev)) -#else -#define dev_name(dev) to_platform_device(dev)->name -#endif +#define dev_name(dev) (dev)->dev_name /* memory devices */ enum dev_type { @@ -124,8 +135,9 @@ enum mem_type { MEM_DDR, /* Double data rate SDRAM */ MEM_RDDR, /* Registered Double data rate SDRAM */ MEM_RMBS, /* Rambus DRAM */ - MEM_DDR2, /* DDR2 RAM */ - MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_DDR2, /* DDR2 RAM */ + MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_RDDR2, /* Registered DDR2 RAM */ }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -141,6 +153,7 @@ enum mem_type { #define MEM_FLAG_RMBS BIT(MEM_RMBS) #define MEM_FLAG_DDR2 BIT(MEM_DDR2) #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) /* chipset Error Detection and Correction capabilities and mode */ enum edac_type { @@ -181,16 +194,23 @@ enum scrub_type { }; #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) -#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC_CORR) -#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC_CORR) +#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) +#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) -#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC_CORR) -#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC_CORR) +#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) +#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ +/* EDAC internal operation states */ +#define OP_ALLOC 0x100 +#define OP_RUNNING_POLL 0x201 +#define OP_RUNNING_INTERRUPT 0x202 +#define OP_RUNNING_POLL_INTR 0x203 +#define OP_OFFLINE 0x300 + /* * There are several things to be aware of that aren't at all obvious: * @@ -276,7 +296,7 @@ enum scrub_type { struct channel_info { int chan_idx; /* channel index */ u32 ce_count; /* Correctable Errors for this CHANNEL */ - char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ + char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ struct csrow_info *csrow; /* the parent */ }; @@ -297,15 +317,29 @@ struct csrow_info { struct mem_ctl_info *mci; /* the parent */ struct kobject kobj; /* sysfs kobject for this csrow */ - struct completion kobj_complete; - /* FIXME the number of CHANNELs might need to become dynamic */ + /* channel information for this csrow */ u32 nr_channels; struct channel_info *channels; }; +/* mcidev_sysfs_attribute structure + * used for driver sysfs attributes and in mem_ctl_info + * sysfs top level entries + */ +struct mcidev_sysfs_attribute { + struct attribute attr; + ssize_t (*show)(struct mem_ctl_info *,char *); + ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); +}; + +/* MEMORY controller information structure + */ struct mem_ctl_info { - struct list_head link; /* for global list of mem_ctl_info structs */ + struct list_head link; /* for global list of mem_ctl_info structs */ + + struct module *owner; /* Module owner of this control struct */ + unsigned long mtype_cap; /* memory types supported by mc */ unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ unsigned long edac_cap; /* configuration capabilities - this is @@ -322,14 +356,15 @@ struct mem_ctl_info { /* Translates sdram memory scrub rate given in bytes/sec to the internal representation and configures whatever else needs to be configured. - */ - int (*set_sdram_scrub_rate) (struct mem_ctl_info *mci, u32 *bw); + */ + int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); /* Get the current sdram memory scrub rate from the internal representation and converts it to the closest matching bandwith in bytes/sec. - */ - int (*get_sdram_scrub_rate) (struct mem_ctl_info *mci, u32 *bw); + */ + int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); + /* pointer to edac checking routine */ void (*edac_check) (struct mem_ctl_info * mci); @@ -340,7 +375,7 @@ struct mem_ctl_info { */ /* FIXME - why not send the phys page to begin with? */ unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, - unsigned long page); + unsigned long page); int mc_idx; int nr_csrows; struct csrow_info *csrows; @@ -353,6 +388,7 @@ struct mem_ctl_info { const char *mod_name; const char *mod_ver; const char *ctl_name; + const char *dev_name; char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ @@ -369,14 +405,327 @@ struct mem_ctl_info { /* edac sysfs device control */ struct kobject edac_mci_kobj; - struct completion kobj_complete; + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; + + /* work struct for this MC */ + struct delayed_work work; + + /* the internal state of this controller instance */ + int op_state; +}; + +/* + * The following are the structures to provide for a generic + * or abstract 'edac_device'. This set of structures and the + * code that implements the APIs for the same, provide for + * registering EDAC type devices which are NOT standard memory. + * + * CPU caches (L1 and L2) + * DMA engines + * Core CPU swithces + * Fabric switch units + * PCIe interface controllers + * other EDAC/ECC type devices that can be monitored for + * errors, etc. + * + * It allows for a 2 level set of hiearchry. For example: + * + * cache could be composed of L1, L2 and L3 levels of cache. + * Each CPU core would have its own L1 cache, while sharing + * L2 and maybe L3 caches. + * + * View them arranged, via the sysfs presentation: + * /sys/devices/system/edac/.. + * + * mc/ <existing memory device directory> + * cpu/cpu0/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * cpu/cpu1/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * ... + * + * the L1 and L2 directories would be "edac_device_block's" + */ + +struct edac_device_counter { + u32 ue_count; + u32 ce_count; +}; + +/* forward reference */ +struct edac_device_ctl_info; +struct edac_device_block; + +/* edac_dev_sysfs_attribute structure + * used for driver sysfs attributes in mem_ctl_info + * for extra controls and attributes: + * like high level error Injection controls + */ +struct edac_dev_sysfs_attribute { + struct attribute attr; + ssize_t (*show)(struct edac_device_ctl_info *, char *); + ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); +}; + +/* edac_dev_sysfs_block_attribute structure + * + * used in leaf 'block' nodes for adding controls/attributes + * + * each block in each instance of the containing control structure + * can have an array of the following. The show and store functions + * will be filled in with the show/store function in the + * low level driver. + * + * The 'value' field will be the actual value field used for + * counting + */ +struct edac_dev_sysfs_block_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *, struct attribute *, char *); + ssize_t (*store)(struct kobject *, struct attribute *, + const char *, size_t); + struct edac_device_block *block; + + unsigned int value; +}; + +/* device block control structure */ +struct edac_device_block { + struct edac_device_instance *instance; /* Up Pointer */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + struct edac_device_counter counters; /* basic UE and CE counters */ + + int nr_attribs; /* how many attributes */ + + /* this block's attributes, could be NULL */ + struct edac_dev_sysfs_block_attribute *block_attributes; + + /* edac sysfs device control */ + struct kobject kobj; +}; + +/* device instance control structure */ +struct edac_device_instance { + struct edac_device_ctl_info *ctl; /* Up pointer */ + char name[EDAC_DEVICE_NAME_LEN + 4]; + + struct edac_device_counter counters; /* instance counters */ + + u32 nr_blocks; /* how many blocks */ + struct edac_device_block *blocks; /* block array */ + + /* edac sysfs device control */ + struct kobject kobj; +}; + + +/* + * Abstract edac_device control info structure + * + */ +struct edac_device_ctl_info { + /* for global list of edac_device_ctl_info structs */ + struct list_head link; + + struct module *owner; /* Module owner of this control struct */ + + int dev_idx; + + /* Per instance controls for this edac_device */ + int log_ue; /* boolean for logging UEs */ + int log_ce; /* boolean for logging CEs */ + int panic_on_ue; /* boolean for panic'ing on an UE */ + unsigned poll_msec; /* number of milliseconds to poll interval */ + unsigned long delay; /* number of jiffies for poll_msec */ + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + struct edac_dev_sysfs_attribute *sysfs_attributes; + + /* pointer to main 'edac' class in sysfs */ + struct sysdev_class *edac_class; + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_device_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_device load start time (jiffies) */ + + /* these are for safe removal of mc devices from global list while + * NMI handlers may be traversing list + */ + struct rcu_head rcu; + struct completion removal_complete; + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Number of instances supported on this control structure + * and the array of those instances + */ + u32 nr_instances; + struct edac_device_instance *instances; + + /* Event counters for the this whole EDAC Device */ + struct edac_device_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; }; +/* To get from the instance's wq to the beginning of the ctl structure */ +#define to_edac_mem_ctl_work(w) \ + container_of(w, struct mem_ctl_info, work) + +#define to_edac_device_ctl_work(w) \ + container_of(w,struct edac_device_ctl_info,work) + +/* + * The alloc() and free() functions for the 'edac_device' control info + * structure. A MC driver will allocate one of these for each edac_device + * it is going to control/register with the EDAC CORE. + */ +extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( + unsigned sizeof_private, + char *edac_device_name, unsigned nr_instances, + char *edac_block_name, unsigned nr_blocks, + unsigned offset_value, + struct edac_dev_sysfs_block_attribute *block_attributes, + unsigned nr_attribs, + int device_index); + +/* The offset value can be: + * -1 indicating no offset value + * 0 for zero-based block numbers + * 1 for 1-based block number + * other for other-based block number + */ +#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) + +extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); + #ifdef CONFIG_PCI +struct edac_pci_counter { + atomic_t pe_count; + atomic_t npe_count; +}; + +/* + * Abstract edac_pci control info structure + * + */ +struct edac_pci_ctl_info { + /* for global list of edac_pci_ctl_info structs */ + struct list_head link; + + int pci_idx; + + struct sysdev_class *edac_class; /* pointer to class */ + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_pci_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_pci load start time (jiffies) */ + + /* these are for safe removal of devices from global list while + * NMI handlers may be traversing list + */ + struct rcu_head rcu; + struct completion complete; + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Event counters for the this whole EDAC Device */ + struct edac_pci_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; + struct completion kobj_complete; +}; + +#define to_edac_pci_ctl_work(w) \ + container_of(w, struct edac_pci_ctl_info,work) + /* write all or some bits in a byte-register*/ static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, - u8 mask) + u8 mask) { if (mask != 0xff) { u8 buf; @@ -392,7 +741,7 @@ static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, /* write all or some bits in a word-register*/ static inline void pci_write_bits16(struct pci_dev *pdev, int offset, - u16 value, u16 mask) + u16 value, u16 mask) { if (mask != 0xffff) { u16 buf; @@ -408,7 +757,7 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset, /* write all or some bits in a dword-register*/ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, - u32 value, u32 mask) + u32 value, u32 mask) { if (mask != 0xffff) { u32 buf; @@ -422,20 +771,16 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, pci_write_config_dword(pdev, offset, value); } -#endif /* CONFIG_PCI */ +#endif /* CONFIG_PCI */ -#ifdef CONFIG_EDAC_DEBUG -void edac_mc_dump_channel(struct channel_info *chan); -void edac_mc_dump_mci(struct mem_ctl_info *mci); -void edac_mc_dump_csrow(struct csrow_info *csrow); -#endif /* CONFIG_EDAC_DEBUG */ - -extern int edac_mc_add_mc(struct mem_ctl_info *mci,int mc_idx); -extern struct mem_ctl_info * edac_mc_del_mc(struct device *dev); +extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, + unsigned nr_chans, int edac_index); +extern int edac_mc_add_mc(struct mem_ctl_info *mci); +extern void edac_mc_free(struct mem_ctl_info *mci); +extern struct mem_ctl_info *edac_mc_find(int idx); +extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, - unsigned long page); -extern void edac_mc_scrub_block(unsigned long page, unsigned long offset, - u32 size); + unsigned long page); /* * The no info errors are used when error overflows are reported. @@ -448,34 +793,59 @@ extern void edac_mc_scrub_block(unsigned long page, unsigned long offset, * statement clutter and extra function arguments. */ extern void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - unsigned long syndrome, int row, int channel, - const char *msg); + unsigned long page_frame_number, + unsigned long offset_in_page, + unsigned long syndrome, int row, int channel, + const char *msg); extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, - const char *msg); + const char *msg); extern void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - int row, const char *msg); + unsigned long page_frame_number, + unsigned long offset_in_page, int row, + const char *msg); extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, - const char *msg); -extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel0, - unsigned int channel1, - char *msg); -extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel, - char *msg); + const char *msg); +extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow, + unsigned int channel0, unsigned int channel1, + char *msg); +extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow, + unsigned int channel, char *msg); /* - * This kmalloc's and initializes all the structures. - * Can't be used if all structures don't have the same lifetime. + * edac_device APIs */ -extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans); +extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); +extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); +extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg); +extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg); -/* Free an mc previously allocated by edac_mc_alloc() */ -extern void edac_mc_free(struct mem_ctl_info *mci); +/* + * edac_pci APIs + */ +extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, + const char *edac_pci_name); + +extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci); + +extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, + unsigned long value); + +extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx); +extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev); + +extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl( + struct device *dev, + const char *mod_name); + +extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci); +extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci); +extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci); + +/* + * edac misc APIs + */ +extern char *edac_op_state_to_string(int op_state); -#endif /* _EDAC_MC_H_ */ +#endif /* _EDAC_CORE_H_ */ diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c new file mode 100644 index 00000000000..f3690a697cf --- /dev/null +++ b/drivers/edac/edac_device.c @@ -0,0 +1,746 @@ + +/* + * edac_device.c + * (C) 2007 www.douglaskthompson.com + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Doug Thompson <norsk5@xmission.com> + * + * edac_device API implementation + * 19 Jan 2007 + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/sysctl.h> +#include <linux/highmem.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <asm/uaccess.h> +#include <asm/page.h> + +#include "edac_core.h" +#include "edac_module.h" + +/* lock for the list: 'edac_device_list', manipulation of this list + * is protected by the 'device_ctls_mutex' lock + */ +static DEFINE_MUTEX(device_ctls_mutex); +static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list); + +#ifdef CONFIG_EDAC_DEBUG +static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) +{ + debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev, edac_dev->dev_idx); + debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check); + debugf3("\tdev = %p\n", edac_dev->dev); + debugf3("\tmod_name:ctl_name = %s:%s\n", + edac_dev->mod_name, edac_dev->ctl_name); + debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info); +} +#endif /* CONFIG_EDAC_DEBUG */ + + +/* + * edac_device_alloc_ctl_info() + * Allocate a new edac device control info structure + * + * The control structure is allocated in complete chunk + * from the OS. It is in turn sub allocated to the + * various objects that compose the struture + * + * The structure has a 'nr_instance' array within itself. + * Each instance represents a major component + * Example: L1 cache and L2 cache are 2 instance components + * + * Within each instance is an array of 'nr_blocks' blockoffsets + */ +struct edac_device_ctl_info *edac_device_alloc_ctl_info( + unsigned sz_private, + char *edac_device_name, unsigned nr_instances, + char *edac_block_name, unsigned nr_blocks, + unsigned offset_value, /* zero, 1, or other based offset */ + struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib, + int device_index) +{ + struct edac_device_ctl_info *dev_ctl; + struct edac_device_instance *dev_inst, *inst; + struct edac_device_block *dev_blk, *blk_p, *blk; + struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; + unsigned total_size; + unsigned count; + unsigned instance, block, attr; + void *pvt; + int err; + + debugf4("%s() instances=%d blocks=%d\n", + __func__, nr_instances, nr_blocks); + + /* Calculate the size of memory we need to allocate AND + * determine the offsets of the various item arrays + * (instance,block,attrib) from the start of an allocated structure. + * We want the alignment of each item (instance,block,attrib) + * to be at least as stringent as what the compiler would + * provide if we could simply hardcode everything into a single struct. + */ + dev_ctl = (struct edac_device_ctl_info *)NULL; + + /* Calc the 'end' offset past end of ONE ctl_info structure + * which will become the start of the 'instance' array + */ + dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst)); + + /* Calc the 'end' offset past the instance array within the ctl_info + * which will become the start of the block array + */ + dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk)); + + /* Calc the 'end' offset past the dev_blk array + * which will become the start of the attrib array, if any. + */ + count = nr_instances * nr_blocks; + dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib)); + + /* Check for case of when an attribute array is specified */ + if (nr_attrib > 0) { + /* calc how many nr_attrib we need */ + count *= nr_attrib; + + /* Calc the 'end' offset past the attributes array */ + pvt = edac_align_ptr(&dev_attrib[count], sz_private); + } else { + /* no attribute array specificed */ + pvt = edac_align_ptr(dev_attrib, sz_private); + } + + /* 'pvt' now points to where the private data area is. + * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) + * is baselined at ZERO + */ + total_size = ((unsigned long)pvt) + sz_private; + + /* Allocate the amount of memory for the set of control structures */ + dev_ctl = kzalloc(total_size, GFP_KERNEL); + if (dev_ctl == NULL) + return NULL; + + /* Adjust pointers so they point within the actual memory we + * just allocated rather than an imaginary chunk of memory + * located at address 0. + * 'dev_ctl' points to REAL memory, while the others are + * ZERO based and thus need to be adjusted to point within + * the allocated memory. + */ + dev_inst = (struct edac_device_instance *) + (((char *)dev_ctl) + ((unsigned long)dev_inst)); + dev_blk = (struct edac_device_block *) + (((char *)dev_ctl) + ((unsigned long)dev_blk)); + dev_attrib = (struct edac_dev_sysfs_block_attribute *) + (((char *)dev_ctl) + ((unsigned long)dev_attrib)); + pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL; + + /* Begin storing the information into the control info structure */ + dev_ctl->dev_idx = device_index; + dev_ctl->nr_instances = nr_instances; + dev_ctl->instances = dev_inst; + dev_ctl->pvt_info = pvt; + + /* Name of this edac device */ + snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); + + debugf4("%s() edac_dev=%p next after end=%p\n", + __func__, dev_ctl, pvt + sz_private ); + + /* Initialize every Instance */ + for (instance = 0; instance < nr_instances; instance++) { + inst = &dev_inst[instance]; + inst->ctl = dev_ctl; + inst->nr_blocks = nr_blocks; + blk_p = &dev_blk[instance * nr_blocks]; + inst->blocks = blk_p; + + /* name of this instance */ + snprintf(inst->name, sizeof(inst->name), + "%s%u", edac_device_name, instance); + + /* Initialize every block in each instance */ + for (block = 0; block < nr_blocks; block++) { + blk = &blk_p[block]; + blk->instance = inst; + snprintf(blk->name, sizeof(blk->name), + "%s%d", edac_block_name, block+offset_value); + + debugf4("%s() instance=%d inst_p=%p block=#%d " + "block_p=%p name='%s'\n", + __func__, instance, inst, block, + blk, blk->name); + + /* if there are NO attributes OR no attribute pointer + * then continue on to next block iteration + */ + if ((nr_attrib == 0) || (attrib_spec == NULL)) + continue; + + /* setup the attribute array for this block */ + blk->nr_attribs = nr_attrib; + attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; + blk->block_attributes = attrib_p; + + debugf4("%s() THIS BLOCK_ATTRIB=%p\n", + __func__, blk->block_attributes); + + /* Initialize every user specified attribute in this + * block with the data the caller passed in + * Each block gets its own copy of pointers, + * and its unique 'value' + */ + for (attr = 0; attr < nr_attrib; attr++) { + attrib = &attrib_p[attr]; + + /* populate the unique per attrib + * with the code pointers and info + */ + attrib->attr = attrib_spec[attr].attr; + attrib->show = attrib_spec[attr].show; + attrib->store = attrib_spec[attr].store; + + attrib->block = blk; /* up link */ + + debugf4("%s() alloc-attrib=%p attrib_name='%s' " + "attrib-spec=%p spec-name=%s\n", + __func__, attrib, attrib->attr.name, + &attrib_spec[attr], + attrib_spec[attr].attr.name + ); + } + } + } + + /* Mark this instance as merely ALLOCATED */ + dev_ctl->op_state = OP_ALLOC; + + /* + * Initialize the 'root' kobj for the edac_device controller + */ + err = edac_device_register_sysfs_main_kobj(dev_ctl); + if (err) { + kfree(dev_ctl); + return NULL; + } + + /* at this point, the root kobj is valid, and in order to + * 'free' the object, then the function: + * edac_device_unregister_sysfs_main_kobj() must be called + * which will perform kobj unregistration and the actual free + * will occur during the kobject callback operation + */ + + return dev_ctl; +} +EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info); + +/* + * edac_device_free_ctl_info() + * frees the memory allocated by the edac_device_alloc_ctl_info() + * function + */ +void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info) +{ + edac_device_unregister_sysfs_main_kobj(ctl_info); +} +EXPORT_SYMBOL_GPL(edac_device_free_ctl_info); + +/* + * find_edac_device_by_dev + * scans the edac_device list for a specific 'struct device *' + * + * lock to be held prior to call: device_ctls_mutex + * + * Return: + * pointer to control structure managing 'dev' + * NULL if not found on list + */ +static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev) +{ + struct edac_device_ctl_info *edac_dev; + struct list_head *item; + + debugf0("%s()\n", __func__); + + list_for_each(item, &edac_device_list) { + edac_dev = list_entry(item, struct edac_device_ctl_info, link); + + if (edac_dev->dev == dev) + return edac_dev; + } + + return NULL; +} + +/* + * add_edac_dev_to_global_list + * Before calling this function, caller must + * assign a unique value to edac_dev->dev_idx. + * + * lock to be held prior to call: device_ctls_mutex + * + * Return: + * 0 on success + * 1 on failure. + */ +static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev) +{ + struct list_head *item, *insert_before; + struct edac_device_ctl_info *rover; + + insert_before = &edac_device_list; + + /* Determine if already on the list */ + rover = find_edac_device_by_dev(edac_dev->dev); + if (unlikely(rover != NULL)) + goto fail0; + + /* Insert in ascending order by 'dev_idx', so find position */ + list_for_each(item, &edac_device_list) { + rover = list_entry(item, struct edac_device_ctl_info, link); + + if (rover->dev_idx >= edac_dev->dev_idx) { + if (unlikely(rover->dev_idx == edac_dev->dev_idx)) + goto fail1; + + insert_before = item; + break; + } + } + + list_add_tail_rcu(&edac_dev->link, insert_before); + return 0; + +fail0: + edac_printk(KERN_WARNING, EDAC_MC, + "%s (%s) %s %s already assigned %d\n", + rover->dev->bus_id, dev_name(rover), + rover->mod_name, rover->ctl_name, rover->dev_idx); + return 1; + +fail1: + edac_printk(KERN_WARNING, EDAC_MC, + "bug in low-level driver: attempt to assign\n" + " duplicate dev_idx %d in %s()\n", rover->dev_idx, + __func__); + return 1; +} + +/* + * complete_edac_device_list_del + * + * callback function when reference count is zero + */ +static void complete_edac_device_list_del(struct rcu_head *head) +{ + struct edac_device_ctl_info *edac_dev; + + edac_dev = container_of(head, struct edac_device_ctl_info, rcu); + INIT_LIST_HEAD(&edac_dev->link); + complete(&edac_dev->removal_complete); +} + +/* + * del_edac_device_from_global_list + * + * remove the RCU, setup for a callback call, + * then wait for the callback to occur + */ +static void del_edac_device_from_global_list(struct edac_device_ctl_info + *edac_device) +{ + list_del_rcu(&edac_device->link); + + init_completion(&edac_device->removal_complete); + call_rcu(&edac_device->rcu, complete_edac_device_list_del); + wait_for_completion(&edac_device->removal_complete); +} + +/** + * edac_device_find + * Search for a edac_device_ctl_info structure whose index is 'idx'. + * + * If found, return a pointer to the structure. + * Else return NULL. + * + * Caller must hold device_ctls_mutex. + */ +struct edac_device_ctl_info *edac_device_find(int idx) +{ + struct list_head *item; + struct edac_device_ctl_info *edac_dev; + + /* Iterate over list, looking for exact match of ID */ + list_for_each(item, &edac_device_list) { + edac_dev = list_entry(item, struct edac_device_ctl_info, link); + + if (edac_dev->dev_idx >= idx) { + if (edac_dev->dev_idx == idx) + return edac_dev; + + /* not on list, so terminate early */ + break; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(edac_device_find); + +/* + * edac_device_workq_function + * performs the operation scheduled by a workq request + * + * this workq is embedded within an edac_device_ctl_info + * structure, that needs to be polled for possible error events. + * + * This operation is to acquire the list mutex lock + * (thus preventing insertation or deletion) + * and then call the device's poll function IFF this device is + * running polled and there is a poll function defined. + */ +static void edac_device_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work); + + mutex_lock(&device_ctls_mutex); + + /* Only poll controllers that are running polled and have a check */ + if ((edac_dev->op_state == OP_RUNNING_POLL) && + (edac_dev->edac_check != NULL)) { + edac_dev->edac_check(edac_dev); + } + + mutex_unlock(&device_ctls_mutex); + + /* Reschedule the workq for the next time period to start again + * if the number of msec is for 1 sec, then adjust to the next + * whole one second to save timers fireing all over the period + * between integral seconds + */ + if (edac_dev->poll_msec == 1000) + queue_delayed_work(edac_workqueue, &edac_dev->work, + round_jiffies(edac_dev->delay)); + else + queue_delayed_work(edac_workqueue, &edac_dev->work, + edac_dev->delay); +} + +/* + * edac_device_workq_setup + * initialize a workq item for this edac_device instance + * passing in the new delay period in msec + */ +void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec) +{ + debugf0("%s()\n", __func__); + + /* take the arg 'msec' and set it into the control structure + * to used in the time period calculation + * then calc the number of jiffies that represents + */ + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); + + INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function); + + /* optimize here for the 1 second case, which will be normal value, to + * fire ON the 1 second time event. This helps reduce all sorts of + * timers firing on sub-second basis, while they are happy + * to fire together on the 1 second exactly + */ + if (edac_dev->poll_msec == 1000) + queue_delayed_work(edac_workqueue, &edac_dev->work, + round_jiffies(edac_dev->delay)); + else + queue_delayed_work(edac_workqueue, &edac_dev->work, + edac_dev->delay); +} + +/* + * edac_device_workq_teardown + * stop the workq processing on this edac_dev + */ +void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) +{ + int status; + + status = cancel_delayed_work(&edac_dev->work); + if (status == 0) { + /* workq instance might be running, wait for it */ + flush_workqueue(edac_workqueue); + } +} + +/* + * edac_device_reset_delay_period + * + * need to stop any outstanding workq queued up at this time + * because we will be resetting the sleep time. + * Then restart the workq on the new delay + */ +void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, + unsigned long value) +{ + /* cancel the current workq request, without the mutex lock */ + edac_device_workq_teardown(edac_dev); + + /* acquire the mutex before doing the workq setup */ + mutex_lock(&device_ctls_mutex); + + /* restart the workq request, with new delay value */ + edac_device_workq_setup(edac_dev, value); + + mutex_unlock(&device_ctls_mutex); +} + +/** + * edac_device_add_device: Insert the 'edac_dev' structure into the + * edac_device global list and create sysfs entries associated with + * edac_device structure. + * @edac_device: pointer to the edac_device structure to be added to the list + * 'edac_device' structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_device_add_device(struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + +#ifdef CONFIG_EDAC_DEBUG + if (edac_debug_level >= 3) + edac_device_dump_device(edac_dev); +#endif + mutex_lock(&device_ctls_mutex); + + if (add_edac_dev_to_global_list(edac_dev)) + goto fail0; + + /* set load time so that error rate can be tracked */ + edac_dev->start_time = jiffies; + + /* create this instance's sysfs entries */ + if (edac_device_create_sysfs(edac_dev)) { + edac_device_printk(edac_dev, KERN_WARNING, + "failed to create sysfs device\n"); + goto fail1; + } + + /* If there IS a check routine, then we are running POLLED */ + if (edac_dev->edac_check != NULL) { + /* This instance is NOW RUNNING */ + edac_dev->op_state = OP_RUNNING_POLL; + + /* + * enable workq processing on this instance, + * default = 1000 msec + */ + edac_device_workq_setup(edac_dev, 1000); + } else { + edac_dev->op_state = OP_RUNNING_INTERRUPT; + } + + /* Report action taken */ + edac_device_printk(edac_dev, KERN_INFO, + "Giving out device to module '%s' controller " + "'%s': DEV '%s' (%s)\n", + edac_dev->mod_name, + edac_dev->ctl_name, + dev_name(edac_dev), + edac_op_state_to_string(edac_dev->op_state)); + + mutex_unlock(&device_ctls_mutex); + return 0; + +fail1: + /* Some error, so remove the entry from the lsit */ + del_edac_device_from_global_list(edac_dev); + +fail0: + mutex_unlock(&device_ctls_mutex); + return 1; +} +EXPORT_SYMBOL_GPL(edac_device_add_device); + +/** + * edac_device_del_device: + * Remove sysfs entries for specified edac_device structure and + * then remove edac_device structure from global list + * + * @pdev: + * Pointer to 'struct device' representing edac_device + * structure to remove. + * + * Return: + * Pointer to removed edac_device structure, + * OR NULL if device not found. + */ +struct edac_device_ctl_info *edac_device_del_device(struct device *dev) +{ + struct edac_device_ctl_info *edac_dev; + + debugf0("%s()\n", __func__); + + mutex_lock(&device_ctls_mutex); + + /* Find the structure on the list, if not there, then leave */ + edac_dev = find_edac_device_by_dev(dev); + if (edac_dev == NULL) { + mutex_unlock(&device_ctls_mutex); + return NULL; + } + + /* mark this instance as OFFLINE */ + edac_dev->op_state = OP_OFFLINE; + + /* clear workq processing on this instance */ + edac_device_workq_teardown(edac_dev); + + /* deregister from global list */ + del_edac_device_from_global_list(edac_dev); + + mutex_unlock(&device_ctls_mutex); + + /* Tear down the sysfs entries for this instance */ + edac_device_remove_sysfs(edac_dev); + + edac_printk(KERN_INFO, EDAC_MC, + "Removed device %d for %s %s: DEV %s\n", + edac_dev->dev_idx, + edac_dev->mod_name, edac_dev->ctl_name, dev_name(edac_dev)); + + return edac_dev; +} +EXPORT_SYMBOL_GPL(edac_device_del_device); + +static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev) +{ + return edac_dev->log_ce; +} + +static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev) +{ + return edac_dev->log_ue; +} + +static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info + *edac_dev) +{ + return edac_dev->panic_on_ue; +} + +/* + * edac_device_handle_ce + * perform a common output and handling of an 'edac_dev' CE event + */ +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg) +{ + struct edac_device_instance *instance; + struct edac_device_block *block = NULL; + + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: 'instance' out of range " + "(%d >= %d)\n", inst_nr, + edac_dev->nr_instances); + return; + } + + instance = edac_dev->instances + inst_nr; + + if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: instance %d 'block' " + "out of range (%d >= %d)\n", + inst_nr, block_nr, + instance->nr_blocks); + return; + } + + if (instance->nr_blocks > 0) { + block = instance->blocks + block_nr; + block->counters.ce_count++; + } + + /* Propogate the count up the 'totals' tree */ + instance->counters.ce_count++; + edac_dev->counters.ce_count++; + + if (edac_device_get_log_ce(edac_dev)) + edac_device_printk(edac_dev, KERN_WARNING, + "CE: %s instance: %s block: %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); +} +EXPORT_SYMBOL_GPL(edac_device_handle_ce); + +/* + * edac_device_handle_ue + * perform a common output and handling of an 'edac_dev' UE event + */ +void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg) +{ + struct edac_device_instance *instance; + struct edac_device_block *block = NULL; + + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: 'instance' out of range " + "(%d >= %d)\n", inst_nr, + edac_dev->nr_instances); + return; + } + + instance = edac_dev->instances + inst_nr; + + if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: instance %d 'block' " + "out of range (%d >= %d)\n", + inst_nr, block_nr, + instance->nr_blocks); + return; + } + + if (instance->nr_blocks > 0) { + block = instance->blocks + block_nr; + block->counters.ue_count++; + } + + /* Propogate the count up the 'totals' tree */ + instance->counters.ue_count++; + edac_dev->counters.ue_count++; + + if (edac_device_get_log_ue(edac_dev)) + edac_device_printk(edac_dev, KERN_EMERG, + "UE: %s instance: %s block: %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); + + if (edac_device_get_panic_on_ue(edac_dev)) + panic("EDAC %s: UE instance: %s block %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); +} +EXPORT_SYMBOL_GPL(edac_device_handle_ue); diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c new file mode 100644 index 00000000000..70b837f23c4 --- /dev/null +++ b/drivers/edac/edac_device_sysfs.c @@ -0,0 +1,896 @@ +/* + * file for managing the edac_device class of devices for EDAC + * + * (C) 2007 SoftwareBitMaker (http://www.softwarebitmaker.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> + * + */ + +#include <linux/ctype.h> +#include <linux/module.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_DEVICE_SYMLINK "device" + +#define to_edacdev(k) container_of(k, struct edac_device_ctl_info, kobj) +#define to_edacdev_attr(a) container_of(a, struct edacdev_attribute, attr) + + +/* + * Set of edac_device_ctl_info attribute store/show functions + */ + +/* 'log_ue' */ +static ssize_t edac_device_ctl_log_ue_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->log_ue); +} + +static ssize_t edac_device_ctl_log_ue_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->log_ue = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'log_ce' */ +static ssize_t edac_device_ctl_log_ce_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->log_ce); +} + +static ssize_t edac_device_ctl_log_ce_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->log_ce = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'panic_on_ue' */ +static ssize_t edac_device_ctl_panic_on_ue_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->panic_on_ue); +} + +static ssize_t edac_device_ctl_panic_on_ue_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->panic_on_ue = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'poll_msec' show and store functions*/ +static ssize_t edac_device_ctl_poll_msec_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->poll_msec); +} + +static ssize_t edac_device_ctl_poll_msec_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + unsigned long value; + + /* get the value and enforce that it is non-zero, must be at least + * one millisecond for the delay period, between scans + * Then cancel last outstanding delay for the work request + * and set a new one. + */ + value = simple_strtoul(data, NULL, 0); + edac_device_reset_delay_period(ctl_info, value); + + return count; +} + +/* edac_device_ctl_info specific attribute structure */ +struct ctl_info_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_device_ctl_info *, char *); + ssize_t(*store) (struct edac_device_ctl_info *, const char *, size_t); +}; + +#define to_ctl_info(k) container_of(k, struct edac_device_ctl_info, kobj) +#define to_ctl_info_attr(a) container_of(a,struct ctl_info_attribute,attr) + +/* Function to 'show' fields from the edac_dev 'ctl_info' structure */ +static ssize_t edac_dev_ctl_info_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj); + struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr); + + if (ctl_info_attr->show) + return ctl_info_attr->show(edac_dev, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'ctl_info' structure */ +static ssize_t edac_dev_ctl_info_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj); + struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr); + + if (ctl_info_attr->store) + return ctl_info_attr->store(edac_dev, buffer, count); + return -EIO; +} + +/* edac_dev file operations for an 'ctl_info' */ +static struct sysfs_ops device_ctl_info_ops = { + .show = edac_dev_ctl_info_show, + .store = edac_dev_ctl_info_store +}; + +#define CTL_INFO_ATTR(_name,_mode,_show,_store) \ +static struct ctl_info_attribute attr_ctl_info_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* Declare the various ctl_info attributes here and their respective ops */ +CTL_INFO_ATTR(log_ue, S_IRUGO | S_IWUSR, + edac_device_ctl_log_ue_show, edac_device_ctl_log_ue_store); +CTL_INFO_ATTR(log_ce, S_IRUGO | S_IWUSR, + edac_device_ctl_log_ce_show, edac_device_ctl_log_ce_store); +CTL_INFO_ATTR(panic_on_ue, S_IRUGO | S_IWUSR, + edac_device_ctl_panic_on_ue_show, + edac_device_ctl_panic_on_ue_store); +CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, + edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); + +/* Base Attributes of the EDAC_DEVICE ECC object */ +static struct ctl_info_attribute *device_ctrl_attr[] = { + &attr_ctl_info_panic_on_ue, + &attr_ctl_info_log_ue, + &attr_ctl_info_log_ce, + &attr_ctl_info_poll_msec, + NULL, +}; + +/* + * edac_device_ctrl_master_release + * + * called when the reference count for the 'main' kobj + * for a edac_device control struct reaches zero + * + * Reference count model: + * One 'main' kobject for each control structure allocated. + * That main kobj is initially set to one AND + * the reference count for the EDAC 'core' module is + * bumped by one, thus added 'keep in memory' dependency. + * + * Each new internal kobj (in instances and blocks) then + * bumps the 'main' kobject. + * + * When they are released their release functions decrement + * the 'main' kobj. + * + * When the main kobj reaches zero (0) then THIS function + * is called which then decrements the EDAC 'core' module. + * When the module reference count reaches zero then the + * module no longer has dependency on keeping the release + * function code in memory and module can be unloaded. + * + * This will support several control objects as well, each + * with its own 'main' kobj. + */ +static void edac_device_ctrl_master_release(struct kobject *kobj) +{ + struct edac_device_ctl_info *edac_dev = to_edacdev(kobj); + + debugf4("%s() control index=%d\n", __func__, edac_dev->dev_idx); + + /* decrement the EDAC CORE module ref count */ + module_put(edac_dev->owner); + + /* free the control struct containing the 'main' kobj + * passed in to this routine + */ + kfree(edac_dev); +} + +/* ktype for the main (master) kobject */ +static struct kobj_type ktype_device_ctrl = { + .release = edac_device_ctrl_master_release, + .sysfs_ops = &device_ctl_info_ops, + .default_attrs = (struct attribute **)device_ctrl_attr, +}; + +/* + * edac_device_register_sysfs_main_kobj + * + * perform the high level setup for the new edac_device instance + * + * Return: 0 SUCCESS + * !0 FAILURE + */ +int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) +{ + struct sysdev_class *edac_class; + int err; + + debugf1("%s()\n", __func__); + + /* get the /sys/devices/system/edac reference */ + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class error\n", __func__); + err = -ENODEV; + goto err_out; + } + + /* Point to the 'edac_class' this instance 'reports' to */ + edac_dev->edac_class = edac_class; + + /* Init the devices's kobject */ + memset(&edac_dev->kobj, 0, sizeof(struct kobject)); + edac_dev->kobj.ktype = &ktype_device_ctrl; + + /* set this new device under the edac_class kobject */ + edac_dev->kobj.parent = &edac_class->kset.kobj; + + /* generate sysfs "..../edac/<name>" */ + debugf4("%s() set name of kobject to: %s\n", __func__, edac_dev->name); + err = kobject_set_name(&edac_dev->kobj, "%s", edac_dev->name); + if (err) + goto err_out; + + /* Record which module 'owns' this control structure + * and bump the ref count of the module + */ + edac_dev->owner = THIS_MODULE; + + if (!try_module_get(edac_dev->owner)) { + err = -ENODEV; + goto err_out; + } + + /* register */ + err = kobject_register(&edac_dev->kobj); + if (err) { + debugf1("%s()Failed to register '.../edac/%s'\n", + __func__, edac_dev->name); + goto err_kobj_reg; + } + + /* At this point, to 'free' the control struct, + * edac_device_unregister_sysfs_main_kobj() must be used + */ + + debugf4("%s() Registered '.../edac/%s' kobject\n", + __func__, edac_dev->name); + + return 0; + + /* Error exit stack */ +err_kobj_reg: + module_put(edac_dev->owner); + +err_out: + return err; +} + +/* + * edac_device_unregister_sysfs_main_kobj: + * the '..../edac/<name>' kobject + */ +void edac_device_unregister_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + debugf4("%s() name of kobject is: %s\n", + __func__, kobject_name(&edac_dev->kobj)); + + /* + * Unregister the edac device's kobject and + * allow for reference count to reach 0 at which point + * the callback will be called to: + * a) module_put() this module + * b) 'kfree' the memory + */ + kobject_unregister(&edac_dev->kobj); +} + +/* edac_dev -> instance information */ + +/* + * Set of low-level instance attribute show functions + */ +static ssize_t instance_ue_count_show(struct edac_device_instance *instance, + char *data) +{ + return sprintf(data, "%u\n", instance->counters.ue_count); +} + +static ssize_t instance_ce_count_show(struct edac_device_instance *instance, + char *data) +{ + return sprintf(data, "%u\n", instance->counters.ce_count); +} + +#define to_instance(k) container_of(k, struct edac_device_instance, kobj) +#define to_instance_attr(a) container_of(a,struct instance_attribute,attr) + +/* DEVICE instance kobject release() function */ +static void edac_device_ctrl_instance_release(struct kobject *kobj) +{ + struct edac_device_instance *instance; + + debugf1("%s()\n", __func__); + + /* map from this kobj to the main control struct + * and then dec the main kobj count + */ + instance = to_instance(kobj); + kobject_put(&instance->ctl->kobj); +} + +/* instance specific attribute structure */ +struct instance_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_device_instance *, char *); + ssize_t(*store) (struct edac_device_instance *, const char *, size_t); +}; + +/* Function to 'show' fields from the edac_dev 'instance' structure */ +static ssize_t edac_dev_instance_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_device_instance *instance = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->show) + return instance_attr->show(instance, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'instance' structure */ +static ssize_t edac_dev_instance_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_device_instance *instance = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->store) + return instance_attr->store(instance, buffer, count); + return -EIO; +} + +/* edac_dev file operations for an 'instance' */ +static struct sysfs_ops device_instance_ops = { + .show = edac_dev_instance_show, + .store = edac_dev_instance_store +}; + +#define INSTANCE_ATTR(_name,_mode,_show,_store) \ +static struct instance_attribute attr_instance_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* + * Define attributes visible for the edac_device instance object + * Each contains a pointer to a show and an optional set + * function pointer that does the low level output/input + */ +INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); +INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); + +/* list of edac_dev 'instance' attributes */ +static struct instance_attribute *device_instance_attr[] = { + &attr_instance_ce_count, + &attr_instance_ue_count, + NULL, +}; + +/* The 'ktype' for each edac_dev 'instance' */ +static struct kobj_type ktype_instance_ctrl = { + .release = edac_device_ctrl_instance_release, + .sysfs_ops = &device_instance_ops, + .default_attrs = (struct attribute **)device_instance_attr, +}; + +/* edac_dev -> instance -> block information */ + +#define to_block(k) container_of(k, struct edac_device_block, kobj) +#define to_block_attr(a) \ + container_of(a, struct edac_dev_sysfs_block_attribute, attr) + +/* + * Set of low-level block attribute show functions + */ +static ssize_t block_ue_count_show(struct kobject *kobj, + struct attribute *attr, char *data) +{ + struct edac_device_block *block = to_block(kobj); + + return sprintf(data, "%u\n", block->counters.ue_count); +} + +static ssize_t block_ce_count_show(struct kobject *kobj, + struct attribute *attr, char *data) +{ + struct edac_device_block *block = to_block(kobj); + + return sprintf(data, "%u\n", block->counters.ce_count); +} + +/* DEVICE block kobject release() function */ +static void edac_device_ctrl_block_release(struct kobject *kobj) +{ + struct edac_device_block *block; + + debugf1("%s()\n", __func__); + + /* get the container of the kobj */ + block = to_block(kobj); + + /* map from 'block kobj' to 'block->instance->controller->main_kobj' + * now 'release' the block kobject + */ + kobject_put(&block->instance->ctl->kobj); +} + + +/* Function to 'show' fields from the edac_dev 'block' structure */ +static ssize_t edac_dev_block_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_dev_sysfs_block_attribute *block_attr = + to_block_attr(attr); + + if (block_attr->show) + return block_attr->show(kobj, attr, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'block' structure */ +static ssize_t edac_dev_block_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_dev_sysfs_block_attribute *block_attr; + + block_attr = to_block_attr(attr); + + if (block_attr->store) + return block_attr->store(kobj, attr, buffer, count); + return -EIO; +} + +/* edac_dev file operations for a 'block' */ +static struct sysfs_ops device_block_ops = { + .show = edac_dev_block_show, + .store = edac_dev_block_store +}; + +#define BLOCK_ATTR(_name,_mode,_show,_store) \ +static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); +BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); + +/* list of edac_dev 'block' attributes */ +static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { + &attr_block_ce_count, + &attr_block_ue_count, + NULL, +}; + +/* The 'ktype' for each edac_dev 'block' */ +static struct kobj_type ktype_block_ctrl = { + .release = edac_device_ctrl_block_release, + .sysfs_ops = &device_block_ops, + .default_attrs = (struct attribute **)device_block_attr, +}; + +/* block ctor/dtor code */ + +/* + * edac_device_create_block + */ +static int edac_device_create_block(struct edac_device_ctl_info *edac_dev, + struct edac_device_instance *instance, + struct edac_device_block *block) +{ + int i; + int err; + struct edac_dev_sysfs_block_attribute *sysfs_attrib; + struct kobject *main_kobj; + + debugf4("%s() Instance '%s' inst_p=%p block '%s' block_p=%p\n", + __func__, instance->name, instance, block->name, block); + debugf4("%s() block kobj=%p block kobj->parent=%p\n", + __func__, &block->kobj, &block->kobj.parent); + + /* init this block's kobject */ + memset(&block->kobj, 0, sizeof(struct kobject)); + block->kobj.parent = &instance->kobj; + block->kobj.ktype = &ktype_block_ctrl; + + err = kobject_set_name(&block->kobj, "%s", block->name); + if (err) + return err; + + /* bump the main kobject's reference count for this controller + * and this instance is dependant on the main + */ + main_kobj = kobject_get(&edac_dev->kobj); + if (!main_kobj) { + err = -ENODEV; + goto err_out; + } + + /* Add this block's kobject */ + err = kobject_register(&block->kobj); + if (err) { + debugf1("%s() Failed to register instance '%s'\n", + __func__, block->name); + kobject_put(main_kobj); + err = -ENODEV; + goto err_out; + } + + /* If there are driver level block attributes, then added them + * to the block kobject + */ + sysfs_attrib = block->block_attributes; + if (sysfs_attrib && block->nr_attribs) { + for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) { + + debugf4("%s() creating block attrib='%s' " + "attrib->%p to kobj=%p\n", + __func__, + sysfs_attrib->attr.name, + sysfs_attrib, &block->kobj); + + /* Create each block_attribute file */ + err = sysfs_create_file(&block->kobj, + &sysfs_attrib->attr); + if (err) + goto err_on_attrib; + } + } + + return 0; + + /* Error unwind stack */ +err_on_attrib: + kobject_unregister(&block->kobj); + +err_out: + return err; +} + +/* + * edac_device_delete_block(edac_dev,block); + */ +static void edac_device_delete_block(struct edac_device_ctl_info *edac_dev, + struct edac_device_block *block) +{ + struct edac_dev_sysfs_block_attribute *sysfs_attrib; + int i; + + /* if this block has 'attributes' then we need to iterate over the list + * and 'remove' the attributes on this block + */ + sysfs_attrib = block->block_attributes; + if (sysfs_attrib && block->nr_attribs) { + for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) { + + /* remove each block_attrib file */ + sysfs_remove_file(&block->kobj, + (struct attribute *) sysfs_attrib); + } + } + + /* unregister this block's kobject, SEE: + * edac_device_ctrl_block_release() callback operation + */ + kobject_unregister(&block->kobj); +} + +/* instance ctor/dtor code */ + +/* + * edac_device_create_instance + * create just one instance of an edac_device 'instance' + */ +static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev, + int idx) +{ + int i, j; + int err; + struct edac_device_instance *instance; + struct kobject *main_kobj; + + instance = &edac_dev->instances[idx]; + + /* Init the instance's kobject */ + memset(&instance->kobj, 0, sizeof(struct kobject)); + + /* set this new device under the edac_device main kobject */ + instance->kobj.parent = &edac_dev->kobj; + instance->kobj.ktype = &ktype_instance_ctrl; + instance->ctl = edac_dev; + + err = kobject_set_name(&instance->kobj, "%s", instance->name); + if (err) + goto err_out; + + /* bump the main kobject's reference count for this controller + * and this instance is dependant on the main + */ + main_kobj = kobject_get(&edac_dev->kobj); + if (!main_kobj) { + err = -ENODEV; + goto err_out; + } + + /* Formally register this instance's kobject */ + err = kobject_register(&instance->kobj); + if (err != 0) { + debugf2("%s() Failed to register instance '%s'\n", + __func__, instance->name); + kobject_put(main_kobj); + goto err_out; + } + + debugf4("%s() now register '%d' blocks for instance %d\n", + __func__, instance->nr_blocks, idx); + + /* register all blocks of this instance */ + for (i = 0; i < instance->nr_blocks; i++) { + err = edac_device_create_block(edac_dev, instance, + &instance->blocks[i]); + if (err) { + /* If any fail, remove all previous ones */ + for (j = 0; j < i; j++) + edac_device_delete_block(edac_dev, + &instance->blocks[j]); + goto err_release_instance_kobj; + } + } + + debugf4("%s() Registered instance %d '%s' kobject\n", + __func__, idx, instance->name); + + return 0; + + /* error unwind stack */ +err_release_instance_kobj: + kobject_unregister(&instance->kobj); + +err_out: + return err; +} + +/* + * edac_device_remove_instance + * remove an edac_device instance + */ +static void edac_device_delete_instance(struct edac_device_ctl_info *edac_dev, + int idx) +{ + struct edac_device_instance *instance; + int i; + + instance = &edac_dev->instances[idx]; + + /* unregister all blocks in this instance */ + for (i = 0; i < instance->nr_blocks; i++) + edac_device_delete_block(edac_dev, &instance->blocks[i]); + + /* unregister this instance's kobject, SEE: + * edac_device_ctrl_instance_release() for callback operation + */ + kobject_unregister(&instance->kobj); +} + +/* + * edac_device_create_instances + * create the first level of 'instances' for this device + * (ie 'cache' might have 'cache0', 'cache1', 'cache2', etc + */ +static int edac_device_create_instances(struct edac_device_ctl_info *edac_dev) +{ + int i, j; + int err; + + debugf0("%s()\n", __func__); + + /* iterate over creation of the instances */ + for (i = 0; i < edac_dev->nr_instances; i++) { + err = edac_device_create_instance(edac_dev, i); + if (err) { + /* unwind previous instances on error */ + for (j = 0; j < i; j++) + edac_device_delete_instance(edac_dev, j); + return err; + } + } + + return 0; +} + +/* + * edac_device_delete_instances(edac_dev); + * unregister all the kobjects of the instances + */ +static void edac_device_delete_instances(struct edac_device_ctl_info *edac_dev) +{ + int i; + + /* iterate over creation of the instances */ + for (i = 0; i < edac_dev->nr_instances; i++) + edac_device_delete_instance(edac_dev, i); +} + +/* edac_dev sysfs ctor/dtor code */ + +/* + * edac_device_add_main_sysfs_attributes + * add some attributes to this instance's main kobject + */ +static int edac_device_add_main_sysfs_attributes( + struct edac_device_ctl_info *edac_dev) +{ + struct edac_dev_sysfs_attribute *sysfs_attrib; + int err = 0; + + sysfs_attrib = edac_dev->sysfs_attributes; + if (sysfs_attrib) { + /* iterate over the array and create an attribute for each + * entry in the list + */ + while (sysfs_attrib->attr.name != NULL) { + err = sysfs_create_file(&edac_dev->kobj, + (struct attribute*) sysfs_attrib); + if (err) + goto err_out; + + sysfs_attrib++; + } + } + +err_out: + return err; +} + +/* + * edac_device_remove_main_sysfs_attributes + * remove any attributes to this instance's main kobject + */ +static void edac_device_remove_main_sysfs_attributes( + struct edac_device_ctl_info *edac_dev) +{ + struct edac_dev_sysfs_attribute *sysfs_attrib; + + /* if there are main attributes, defined, remove them. First, + * point to the start of the array and iterate over it + * removing each attribute listed from this device's instance's kobject + */ + sysfs_attrib = edac_dev->sysfs_attributes; + if (sysfs_attrib) { + while (sysfs_attrib->attr.name != NULL) { + sysfs_remove_file(&edac_dev->kobj, + (struct attribute *) sysfs_attrib); + sysfs_attrib++; + } + } +} + +/* + * edac_device_create_sysfs() Constructor + * + * accept a created edac_device control structure + * and 'export' it to sysfs. The 'main' kobj should already have been + * created. 'instance' and 'block' kobjects should be registered + * along with any 'block' attributes from the low driver. In addition, + * the main attributes (if any) are connected to the main kobject of + * the control structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev) +{ + int err; + struct kobject *edac_kobj = &edac_dev->kobj; + + debugf0("%s() idx=%d\n", __func__, edac_dev->dev_idx); + + /* go create any main attributes callers wants */ + err = edac_device_add_main_sysfs_attributes(edac_dev); + if (err) { + debugf0("%s() failed to add sysfs attribs\n", __func__); + goto err_out; + } + + /* create a symlink from the edac device + * to the platform 'device' being used for this + */ + err = sysfs_create_link(edac_kobj, + &edac_dev->dev->kobj, EDAC_DEVICE_SYMLINK); + if (err) { + debugf0("%s() sysfs_create_link() returned err= %d\n", + __func__, err); + goto err_remove_main_attribs; + } + + /* Create the first level instance directories + * In turn, the nested blocks beneath the instances will + * be registered as well + */ + err = edac_device_create_instances(edac_dev); + if (err) { + debugf0("%s() edac_device_create_instances() " + "returned err= %d\n", __func__, err); + goto err_remove_link; + } + + + debugf4("%s() create-instances done, idx=%d\n", + __func__, edac_dev->dev_idx); + + return 0; + + /* Error unwind stack */ +err_remove_link: + /* remove the sym link */ + sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK); + +err_remove_main_attribs: + edac_device_remove_main_sysfs_attributes(edac_dev); + +err_out: + return err; +} + +/* + * edac_device_remove_sysfs() destructor + * + * given an edac_device struct, tear down the kobject resources + */ +void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + + /* remove any main attributes for this device */ + edac_device_remove_main_sysfs_attributes(edac_dev); + + /* remove the device sym link */ + sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK); + + /* walk the instance/block kobject tree, deconstructing it */ + edac_device_delete_instances(edac_dev); +} diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 804875de580..4471be36259 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -27,1200 +27,20 @@ #include <linux/list.h> #include <linux/sysdev.h> #include <linux/ctype.h> -#include <linux/kthread.h> -#include <linux/freezer.h> +#include <linux/edac.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/edac.h> -#include "edac_mc.h" - -#define EDAC_MC_VERSION "Ver: 2.0.1 " __DATE__ - - -#ifdef CONFIG_EDAC_DEBUG -/* Values of 0 to 4 will generate output */ -int edac_debug_level = 1; -EXPORT_SYMBOL_GPL(edac_debug_level); -#endif - -/* EDAC Controls, setable by module parameter, and sysfs */ -static int log_ue = 1; -static int log_ce = 1; -static int panic_on_ue; -static int poll_msec = 1000; +#include "edac_core.h" +#include "edac_module.h" /* lock to memory controller's control array */ -static DECLARE_MUTEX(mem_ctls_mutex); +static DEFINE_MUTEX(mem_ctls_mutex); static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); -static struct task_struct *edac_thread; - -#ifdef CONFIG_PCI -static int check_pci_parity = 0; /* default YES check PCI parity */ -static int panic_on_pci_parity; /* default no panic on PCI Parity */ -static atomic_t pci_parity_count = ATOMIC_INIT(0); - -static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ -static struct completion edac_pci_kobj_complete; -#endif /* CONFIG_PCI */ - -/* START sysfs data and methods */ - - -static const char *mem_types[] = { - [MEM_EMPTY] = "Empty", - [MEM_RESERVED] = "Reserved", - [MEM_UNKNOWN] = "Unknown", - [MEM_FPM] = "FPM", - [MEM_EDO] = "EDO", - [MEM_BEDO] = "BEDO", - [MEM_SDR] = "Unbuffered-SDR", - [MEM_RDR] = "Registered-SDR", - [MEM_DDR] = "Unbuffered-DDR", - [MEM_RDDR] = "Registered-DDR", - [MEM_RMBS] = "RMBS" -}; - -static const char *dev_types[] = { - [DEV_UNKNOWN] = "Unknown", - [DEV_X1] = "x1", - [DEV_X2] = "x2", - [DEV_X4] = "x4", - [DEV_X8] = "x8", - [DEV_X16] = "x16", - [DEV_X32] = "x32", - [DEV_X64] = "x64" -}; - -static const char *edac_caps[] = { - [EDAC_UNKNOWN] = "Unknown", - [EDAC_NONE] = "None", - [EDAC_RESERVED] = "Reserved", - [EDAC_PARITY] = "PARITY", - [EDAC_EC] = "EC", - [EDAC_SECDED] = "SECDED", - [EDAC_S2ECD2ED] = "S2ECD2ED", - [EDAC_S4ECD4ED] = "S4ECD4ED", - [EDAC_S8ECD8ED] = "S8ECD8ED", - [EDAC_S16ECD16ED] = "S16ECD16ED" -}; - -/* sysfs object: /sys/devices/system/edac */ -static struct sysdev_class edac_class = { - set_kset_name("edac"), -}; - -/* sysfs object: - * /sys/devices/system/edac/mc - */ -static struct kobject edac_memctrl_kobj; - -/* We use these to wait for the reference counts on edac_memctrl_kobj and - * edac_pci_kobj to reach 0. - */ -static struct completion edac_memctrl_kobj_complete; - -/* - * /sys/devices/system/edac/mc; - * data structures and methods - */ -static ssize_t memctrl_int_show(void *ptr, char *buffer) -{ - int *value = (int*) ptr; - return sprintf(buffer, "%u\n", *value); -} - -static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = (int*) ptr; - - if (isdigit(*buffer)) - *value = simple_strtoul(buffer, NULL, 0); - - return count; -} - -struct memctrl_dev_attribute { - struct attribute attr; - void *value; - ssize_t (*show)(void *,char *); - ssize_t (*store)(void *, const char *, size_t); -}; - -/* Set of show/store abstract level functions for memory control object */ -static ssize_t memctrl_dev_show(struct kobject *kobj, - struct attribute *attr, char *buffer) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute*)attr; - - if (memctrl_dev->show) - return memctrl_dev->show(memctrl_dev->value, buffer); - - return -EIO; -} - -static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute*)attr; - - if (memctrl_dev->store) - return memctrl_dev->store(memctrl_dev->value, buffer, count); - - return -EIO; -} - -static struct sysfs_ops memctrlfs_ops = { - .show = memctrl_dev_show, - .store = memctrl_dev_store -}; - -#define MEMCTRL_ATTR(_name,_mode,_show,_store) \ -struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = &_name, \ - .show = _show, \ - .store = _store, \ -}; - -#define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \ -struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = _data, \ - .show = _show, \ - .store = _store, \ -}; - -/* csrow<id> control files */ -MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); - -/* Base Attributes of the memory ECC object */ -static struct memctrl_dev_attribute *memctrl_attr[] = { - &attr_panic_on_ue, - &attr_log_ue, - &attr_log_ce, - &attr_poll_msec, - NULL, -}; - -/* Main MC kobject release() function */ -static void edac_memctrl_master_release(struct kobject *kobj) -{ - debugf1("%s()\n", __func__); - complete(&edac_memctrl_kobj_complete); -} - -static struct kobj_type ktype_memctrl = { - .release = edac_memctrl_master_release, - .sysfs_ops = &memctrlfs_ops, - .default_attrs = (struct attribute **) memctrl_attr, -}; - -/* Initialize the main sysfs entries for edac: - * /sys/devices/system/edac - * - * and children - * - * Return: 0 SUCCESS - * !0 FAILURE - */ -static int edac_sysfs_memctrl_setup(void) -{ - int err = 0; - - debugf1("%s()\n", __func__); - - /* create the /sys/devices/system/edac directory */ - err = sysdev_class_register(&edac_class); - - if (err) { - debugf1("%s() error=%d\n", __func__, err); - return err; - } - - /* Init the MC's kobject */ - memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj)); - edac_memctrl_kobj.parent = &edac_class.kset.kobj; - edac_memctrl_kobj.ktype = &ktype_memctrl; - - /* generate sysfs "..../edac/mc" */ - err = kobject_set_name(&edac_memctrl_kobj,"mc"); - - if (err) - goto fail; - - /* FIXME: maybe new sysdev_create_subdir() */ - err = kobject_register(&edac_memctrl_kobj); - - if (err) { - debugf1("Failed to register '.../edac/mc'\n"); - goto fail; - } - - debugf1("Registered '.../edac/mc' kobject\n"); - - return 0; - -fail: - sysdev_class_unregister(&edac_class); - return err; -} - -/* - * MC teardown: - * the '..../edac/mc' kobject followed by '..../edac' itself - */ -static void edac_sysfs_memctrl_teardown(void) -{ - debugf0("MC: " __FILE__ ": %s()\n", __func__); - - /* Unregister the MC's kobject and wait for reference count to reach - * 0. - */ - init_completion(&edac_memctrl_kobj_complete); - kobject_unregister(&edac_memctrl_kobj); - wait_for_completion(&edac_memctrl_kobj_complete); - - /* Unregister the 'edac' object */ - sysdev_class_unregister(&edac_class); -} - -#ifdef CONFIG_PCI -static ssize_t edac_pci_int_show(void *ptr, char *buffer) -{ - int *value = ptr; - return sprintf(buffer,"%d\n",*value); -} - -static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = ptr; - - if (isdigit(*buffer)) - *value = simple_strtoul(buffer,NULL,0); - - return count; -} - -struct edac_pci_dev_attribute { - struct attribute attr; - void *value; - ssize_t (*show)(void *,char *); - ssize_t (*store)(void *, const char *,size_t); -}; - -/* Set of show/store abstract level functions for PCI Parity object */ -static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, - char *buffer) -{ - struct edac_pci_dev_attribute *edac_pci_dev; - edac_pci_dev= (struct edac_pci_dev_attribute*)attr; - - if (edac_pci_dev->show) - return edac_pci_dev->show(edac_pci_dev->value, buffer); - return -EIO; -} - -static ssize_t edac_pci_dev_store(struct kobject *kobj, - struct attribute *attr, const char *buffer, size_t count) -{ - struct edac_pci_dev_attribute *edac_pci_dev; - edac_pci_dev= (struct edac_pci_dev_attribute*)attr; - - if (edac_pci_dev->show) - return edac_pci_dev->store(edac_pci_dev->value, buffer, count); - return -EIO; -} - -static struct sysfs_ops edac_pci_sysfs_ops = { - .show = edac_pci_dev_show, - .store = edac_pci_dev_store -}; - -#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ -struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = &_name, \ - .show = _show, \ - .store = _store, \ -}; - -#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ -struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = _data, \ - .show = _show, \ - .store = _store, \ -}; - -/* PCI Parity control files */ -EDAC_PCI_ATTR(check_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, - edac_pci_int_store); -EDAC_PCI_ATTR(panic_on_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, - edac_pci_int_store); -EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); - -/* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_parity, - &edac_pci_attr_panic_on_pci_parity, - &edac_pci_attr_pci_parity_count, - NULL, -}; - -/* No memory to release */ -static void edac_pci_release(struct kobject *kobj) -{ - debugf1("%s()\n", __func__); - complete(&edac_pci_kobj_complete); -} - -static struct kobj_type ktype_edac_pci = { - .release = edac_pci_release, - .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **) edac_pci_attr, -}; - -/** - * edac_sysfs_pci_setup() - * - */ -static int edac_sysfs_pci_setup(void) -{ - int err; - - debugf1("%s()\n", __func__); - - memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj)); - edac_pci_kobj.parent = &edac_class.kset.kobj; - edac_pci_kobj.ktype = &ktype_edac_pci; - err = kobject_set_name(&edac_pci_kobj, "pci"); - - if (!err) { - /* Instanstiate the csrow object */ - /* FIXME: maybe new sysdev_create_subdir() */ - err = kobject_register(&edac_pci_kobj); - - if (err) - debugf1("Failed to register '.../edac/pci'\n"); - else - debugf1("Registered '.../edac/pci' kobject\n"); - } - - return err; -} - -static void edac_sysfs_pci_teardown(void) -{ - debugf0("%s()\n", __func__); - init_completion(&edac_pci_kobj_complete); - kobject_unregister(&edac_pci_kobj); - wait_for_completion(&edac_pci_kobj_complete); -} - - -static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) -{ - int where; - u16 status; - - where = secondary ? PCI_SEC_STATUS : PCI_STATUS; - pci_read_config_word(dev, where, &status); - - /* If we get back 0xFFFF then we must suspect that the card has been - * pulled but the Linux PCI layer has not yet finished cleaning up. - * We don't want to report on such devices - */ - - if (status == 0xFFFF) { - u32 sanity; - - pci_read_config_dword(dev, 0, &sanity); - - if (sanity == 0xFFFFFFFF) - return 0; - } - - status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | - PCI_STATUS_PARITY; - - if (status) - /* reset only the bits we are interested in */ - pci_write_config_word(dev, where, status); - - return status; -} - -typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); - -/* Clear any PCI parity errors logged by this device. */ -static void edac_pci_dev_parity_clear(struct pci_dev *dev) -{ - u8 header_type; - - get_pci_parity_status(dev, 0); - - /* read the device TYPE, looking for bridges */ - pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) - get_pci_parity_status(dev, 1); -} - -/* - * PCI Parity polling - * - */ -static void edac_pci_dev_parity_test(struct pci_dev *dev) -{ - u16 status; - u8 header_type; - - /* read the STATUS register on this device - */ - status = get_pci_parity_status(dev, 0); - - debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); - - /* check the status reg for errors */ - if (status) { - if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) - edac_printk(KERN_CRIT, EDAC_PCI, - "Signaled System Error on %s\n", - pci_name(dev)); - - if (status & (PCI_STATUS_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, - "Master Data Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - - if (status & (PCI_STATUS_DETECTED_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, - "Detected Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - } - - /* read the device TYPE, looking for bridges */ - pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - - debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); - - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { - /* On bridges, need to examine secondary status register */ - status = get_pci_parity_status(dev, 1); - - debugf2("PCI SEC_STATUS= 0x%04x %s\n", - status, dev->dev.bus_id ); - - /* check the secondary status reg for errors */ - if (status) { - if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Signaled System Error on %s\n", - pci_name(dev)); - - if (status & (PCI_STATUS_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Master Data Parity Error on " - "%s\n", pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - - if (status & (PCI_STATUS_DETECTED_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Detected Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - } - } -} - -/* - * pci_dev parity list iterator - * Scan the PCI device list for one iteration, looking for SERRORs - * Master Parity ERRORS or Parity ERRORs on primary or secondary devices - */ -static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) -{ - struct pci_dev *dev = NULL; - - /* request for kernel access to the next PCI device, if any, - * and while we are looking at it have its reference count - * bumped until we are done with it - */ - while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - fn(dev); - } -} - -static void do_pci_parity_check(void) -{ - unsigned long flags; - int before_count; - - debugf3("%s()\n", __func__); - - if (!check_pci_parity) - return; - - before_count = atomic_read(&pci_parity_count); - - /* scan all PCI devices looking for a Parity Error on devices and - * bridges - */ - local_irq_save(flags); - edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); - local_irq_restore(flags); - - /* Only if operator has selected panic on PCI Error */ - if (panic_on_pci_parity) { - /* If the count is different 'after' from 'before' */ - if (before_count != atomic_read(&pci_parity_count)) - panic("EDAC: PCI Parity Error"); - } -} - -static inline void clear_pci_parity_errors(void) -{ - /* Clear any PCI bus parity errors that devices initially have logged - * in their registers. - */ - edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); -} - -#else /* CONFIG_PCI */ - -/* pre-process these away */ -#define do_pci_parity_check() -#define clear_pci_parity_errors() -#define edac_sysfs_pci_teardown() -#define edac_sysfs_pci_setup() (0) - -#endif /* CONFIG_PCI */ - -/* EDAC sysfs CSROW data structures and methods - */ - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", mem_types[csrow->mtype]); -} - -static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", dev_types[csrow->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct csrow_info *csrow, - char *data, int channel) -{ - return snprintf(data, EDAC_MC_LABEL_LEN,"%s", - csrow->channels[channel].label); -} - -static ssize_t channel_dimm_label_store(struct csrow_info *csrow, - const char *data, - size_t count, - int channel) -{ - ssize_t max_size = 0; - - max_size = min((ssize_t)count,(ssize_t)EDAC_MC_LABEL_LEN-1); - strncpy(csrow->channels[channel].label, data, max_size); - csrow->channels[channel].label[max_size] = '\0'; - - return max_size; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct csrow_info *csrow, - char *data, - int channel) -{ - return sprintf(data, "%u\n", csrow->channels[channel].ce_count); -} - -/* csrow specific attribute structure */ -struct csrowdev_attribute { - struct attribute attr; - ssize_t (*show)(struct csrow_info *,char *,int); - ssize_t (*store)(struct csrow_info *, const char *,size_t,int); - int private; -}; - -#define to_csrow(k) container_of(k, struct csrow_info, kobj) -#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) - -/* Set of show/store higher level functions for default csrow attributes */ -static ssize_t csrowdev_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) -{ - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); - - if (csrowdev_attr->show) - return csrowdev_attr->show(csrow, - buffer, - csrowdev_attr->private); - return -EIO; -} - -static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr); - - if (csrowdev_attr->store) - return csrowdev_attr->store(csrow, - buffer, - count, - csrowdev_attr->private); - return -EIO; -} - -static struct sysfs_ops csrowfs_ops = { - .show = csrowdev_show, - .store = csrowdev_store -}; - -#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ -struct csrowdev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .private = _private, \ -}; - -/* default cwrow<id>/attribute files */ -CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL,0); -CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL,0); -CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL,0); -CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL,0); -CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL,0); -CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL,0); - -/* default attributes of the CSROW<id> object */ -static struct csrowdev_attribute *default_csrow_attr[] = { - &attr_dev_type, - &attr_mem_type, - &attr_edac_mode, - &attr_size_mb, - &attr_ue_count, - &attr_ce_count, - NULL, -}; - - -/* possible dynamic channel DIMM Label attribute files */ -CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 0 ); -CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 1 ); -CSROWDEV_ATTR(ch2_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 2 ); -CSROWDEV_ATTR(ch3_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 3 ); -CSROWDEV_ATTR(ch4_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 4 ); -CSROWDEV_ATTR(ch5_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 5 ); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { - &attr_ch0_dimm_label, - &attr_ch1_dimm_label, - &attr_ch2_dimm_label, - &attr_ch3_dimm_label, - &attr_ch4_dimm_label, - &attr_ch5_dimm_label -}; - -/* possible dynamic channel ce_count attribute files */ -CSROWDEV_ATTR(ch0_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 0 ); -CSROWDEV_ATTR(ch1_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 1 ); -CSROWDEV_ATTR(ch2_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 2 ); -CSROWDEV_ATTR(ch3_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 3 ); -CSROWDEV_ATTR(ch4_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 4 ); -CSROWDEV_ATTR(ch5_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 5 ); - -/* Total possible dynamic ce_count attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { - &attr_ch0_ce_count, - &attr_ch1_ce_count, - &attr_ch2_ce_count, - &attr_ch3_ce_count, - &attr_ch4_ce_count, - &attr_ch5_ce_count -}; - - -#define EDAC_NR_CHANNELS 6 - -/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ -static int edac_create_channel_files(struct kobject *kobj, int chan) -{ - int err=-ENODEV; - - if (chan >= EDAC_NR_CHANNELS) - return err; - - /* create the DIMM label attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) dynamic_csrow_dimm_attr[chan]); - - if (!err) { - /* create the CE Count attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) dynamic_csrow_ce_count_attr[chan]); - } else { - debugf1("%s() dimm labels and ce_count files created", __func__); - } - - return err; -} - -/* No memory to release for this kobj */ -static void edac_csrow_instance_release(struct kobject *kobj) -{ - struct csrow_info *cs; - - cs = container_of(kobj, struct csrow_info, kobj); - complete(&cs->kobj_complete); -} - -/* the kobj_type instance for a CSROW */ -static struct kobj_type ktype_csrow = { - .release = edac_csrow_instance_release, - .sysfs_ops = &csrowfs_ops, - .default_attrs = (struct attribute **) default_csrow_attr, -}; - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_object( - struct kobject *edac_mci_kobj, - struct csrow_info *csrow, - int index) -{ - int err = 0; - int chan; - - memset(&csrow->kobj, 0, sizeof(csrow->kobj)); - - /* generate ..../edac/mc/mc<id>/csrow<index> */ - - csrow->kobj.parent = edac_mci_kobj; - csrow->kobj.ktype = &ktype_csrow; - - /* name this instance of csrow<id> */ - err = kobject_set_name(&csrow->kobj,"csrow%d",index); - if (err) - goto error_exit; - - /* Instanstiate the csrow object */ - err = kobject_register(&csrow->kobj); - if (!err) { - /* Create the dyanmic attribute files on this csrow, - * namely, the DIMM labels and the channel ce_count - */ - for (chan = 0; chan < csrow->nr_channels; chan++) { - err = edac_create_channel_files(&csrow->kobj,chan); - if (err) - break; - } - } - -error_exit: - return err; -} - -/* default sysfs methods and data structures for the main MCI kobject */ - -static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, - const char *data, size_t count) -{ - int row, chan; - - mci->ue_noinfo_count = 0; - mci->ce_noinfo_count = 0; - mci->ue_count = 0; - mci->ce_count = 0; - - for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *ri = &mci->csrows[row]; - - ri->ue_count = 0; - ri->ce_count = 0; - - for (chan = 0; chan < ri->nr_channels; chan++) - ri->channels[chan].ce_count = 0; - } - - mci->start_time = jiffies; - return count; -} - -/* memory scrubbing */ -static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, - const char *data, size_t count) -{ - u32 bandwidth = -1; - - if (mci->set_sdram_scrub_rate) { - - memctrl_int_store(&bandwidth, data, count); - - if (!(*mci->set_sdram_scrub_rate)(mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set successfully, applied: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set FAILED, could not apply: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'set'control is not implemented!\n"); - } - return count; -} - -static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) -{ - u32 bandwidth = -1; - - if (mci->get_sdram_scrub_rate) { - if (!(*mci->get_sdram_scrub_rate)(mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate successfully, fetched: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate fetch FAILED, got: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'get' control is not implemented!\n"); - } - return sprintf(data, "%d\n", bandwidth); -} - -/* default attribute files for the MCI object */ -static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ue_count); -} - -static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ce_count); -} - -static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ce_noinfo_count); -} - -static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ue_noinfo_count); -} - -static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ); -} - -static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%s\n", mci->ctl_name); -} - -static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) -{ - int total_pages, csrow_idx; - - for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; - csrow_idx++) { - struct csrow_info *csrow = &mci->csrows[csrow_idx]; - - if (!csrow->nr_pages) - continue; - - total_pages += csrow->nr_pages; - } - - return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages)); -} - -struct mcidev_attribute { - struct attribute attr; - ssize_t (*show)(struct mem_ctl_info *,char *); - ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); -}; - -#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) -#define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr) - -/* MCI show/store functions for top most object */ -static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, - char *buffer) -{ - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); - - if (mcidev_attr->show) - return mcidev_attr->show(mem_ctl_info, buffer); - - return -EIO; -} - -static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); - - if (mcidev_attr->store) - return mcidev_attr->store(mem_ctl_info, buffer, count); - - return -EIO; -} - -static struct sysfs_ops mci_ops = { - .show = mcidev_show, - .store = mcidev_store -}; - -#define MCIDEV_ATTR(_name,_mode,_show,_store) \ -struct mcidev_attribute mci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -}; - -/* default Control file */ -MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store); - -/* default Attribute files */ -MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL); -MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL); -MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL); -MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL); -MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL); -MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL); -MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL); - -/* memory scrubber attribute file */ -MCIDEV_ATTR(sdram_scrub_rate,S_IRUGO|S_IWUSR,mci_sdram_scrub_rate_show,mci_sdram_scrub_rate_store); - -static struct mcidev_attribute *mci_attr[] = { - &mci_attr_reset_counters, - &mci_attr_mc_name, - &mci_attr_size_mb, - &mci_attr_seconds_since_reset, - &mci_attr_ue_noinfo_count, - &mci_attr_ce_noinfo_count, - &mci_attr_ue_count, - &mci_attr_ce_count, - &mci_attr_sdram_scrub_rate, - NULL -}; - -/* - * Release of a MC controlling instance - */ -static void edac_mci_instance_release(struct kobject *kobj) -{ - struct mem_ctl_info *mci; - - mci = to_mci(kobj); - debugf0("%s() idx=%d\n", __func__, mci->mc_idx); - complete(&mci->kobj_complete); -} - -static struct kobj_type ktype_mci = { - .release = edac_mci_instance_release, - .sysfs_ops = &mci_ops, - .default_attrs = (struct attribute **) mci_attr, -}; - - -#define EDAC_DEVICE_SYMLINK "device" - -/* - * Create a new Memory Controller kobject instance, - * mc<id> under the 'mc' directory - * - * Return: - * 0 Success - * !0 Failure - */ -static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) -{ - int i; - int err; - struct csrow_info *csrow; - struct kobject *edac_mci_kobj=&mci->edac_mci_kobj; - - debugf0("%s() idx=%d\n", __func__, mci->mc_idx); - memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj)); - - /* set the name of the mc<id> object */ - err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx); - if (err) - return err; - - /* link to our parent the '..../edac/mc' object */ - edac_mci_kobj->parent = &edac_memctrl_kobj; - edac_mci_kobj->ktype = &ktype_mci; - - /* register the mc<id> kobject */ - err = kobject_register(edac_mci_kobj); - if (err) - return err; - - /* create a symlink for the device */ - err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj, - EDAC_DEVICE_SYMLINK); - if (err) - goto fail0; - - /* Make directories for each CSROW object - * under the mc<id> kobject - */ - for (i = 0; i < mci->nr_csrows; i++) { - csrow = &mci->csrows[i]; - - /* Only expose populated CSROWs */ - if (csrow->nr_pages > 0) { - err = edac_create_csrow_object(edac_mci_kobj,csrow,i); - if (err) - goto fail1; - } - } - - return 0; - - /* CSROW error: backout what has already been registered, */ -fail1: - for ( i--; i >= 0; i--) { - if (csrow->nr_pages > 0) { - init_completion(&csrow->kobj_complete); - kobject_unregister(&mci->csrows[i].kobj); - wait_for_completion(&csrow->kobj_complete); - } - } - -fail0: - init_completion(&mci->kobj_complete); - kobject_unregister(edac_mci_kobj); - wait_for_completion(&mci->kobj_complete); - return err; -} - -/* - * remove a Memory Controller instance - */ -static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) -{ - int i; - - debugf0("%s()\n", __func__); - - /* remove all csrow kobjects */ - for (i = 0; i < mci->nr_csrows; i++) { - if (mci->csrows[i].nr_pages > 0) { - init_completion(&mci->csrows[i].kobj_complete); - kobject_unregister(&mci->csrows[i].kobj); - wait_for_completion(&mci->csrows[i].kobj_complete); - } - } - - sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); - init_completion(&mci->kobj_complete); - kobject_unregister(&mci->edac_mci_kobj); - wait_for_completion(&mci->kobj_complete); -} - -/* END OF sysfs data and methods */ - #ifdef CONFIG_EDAC_DEBUG -void edac_mc_dump_channel(struct channel_info *chan) +static void edac_mc_dump_channel(struct channel_info *chan) { debugf4("\tchannel = %p\n", chan); debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); @@ -1228,25 +48,21 @@ void edac_mc_dump_channel(struct channel_info *chan) debugf4("\tchannel->label = '%s'\n", chan->label); debugf4("\tchannel->csrow = %p\n\n", chan->csrow); } -EXPORT_SYMBOL_GPL(edac_mc_dump_channel); -void edac_mc_dump_csrow(struct csrow_info *csrow) +static void edac_mc_dump_csrow(struct csrow_info *csrow) { debugf4("\tcsrow = %p\n", csrow); debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); - debugf4("\tcsrow->first_page = 0x%lx\n", - csrow->first_page); + debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); - debugf4("\tcsrow->nr_channels = %d\n", - csrow->nr_channels); + debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); debugf4("\tcsrow->channels = %p\n", csrow->channels); debugf4("\tcsrow->mci = %p\n\n", csrow->mci); } -EXPORT_SYMBOL_GPL(edac_mc_dump_csrow); -void edac_mc_dump_mci(struct mem_ctl_info *mci) +static void edac_mc_dump_mci(struct mem_ctl_info *mci) { debugf3("\tmci = %p\n", mci); debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); @@ -1256,13 +72,11 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci) debugf3("\tmci->nr_csrows = %d, csrows = %p\n", mci->nr_csrows, mci->csrows); debugf3("\tdev = %p\n", mci->dev); - debugf3("\tmod_name:ctl_name = %s:%s\n", - mci->mod_name, mci->ctl_name); + debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); debugf3("\tpvt_info = %p\n\n", mci->pvt_info); } -EXPORT_SYMBOL_GPL(edac_mc_dump_mci); -#endif /* CONFIG_EDAC_DEBUG */ +#endif /* CONFIG_EDAC_DEBUG */ /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. * Adjust 'ptr' so that its alignment is at least as stringent as what the @@ -1271,7 +85,7 @@ EXPORT_SYMBOL_GPL(edac_mc_dump_mci); * If 'size' is a constant, the compiler will optimize this whole function * down to either a no-op or the addition of a constant to the value of 'ptr'. */ -static inline char * align_ptr(void *ptr, unsigned size) +void *edac_align_ptr(void *ptr, unsigned size) { unsigned align, r; @@ -1288,14 +102,14 @@ static inline char * align_ptr(void *ptr, unsigned size) else if (size > sizeof(char)) align = sizeof(short); else - return (char *) ptr; + return (char *)ptr; r = size % align; if (r == 0) - return (char *) ptr; + return (char *)ptr; - return (char *) (((unsigned long) ptr) + align - r); + return (void *)(((unsigned long)ptr) + align - r); } /** @@ -1315,7 +129,7 @@ static inline char * align_ptr(void *ptr, unsigned size) * struct mem_ctl_info pointer */ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans) + unsigned nr_chans, int edac_index) { struct mem_ctl_info *mci; struct csrow_info *csi, *csrow; @@ -1323,30 +137,32 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, void *pvt; unsigned size; int row, chn; + int err; /* Figure out the offsets of the various items from the start of an mc * structure. We want the alignment of each item to be at least as * stringent as what the compiler would provide if we could simply * hardcode everything into a single struct. */ - mci = (struct mem_ctl_info *) 0; - csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi)); - chi = (struct channel_info *) - align_ptr(&csi[nr_csrows], sizeof(*chi)); - pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); - size = ((unsigned long) pvt) + sz_pvt; - - if ((mci = kmalloc(size, GFP_KERNEL)) == NULL) + mci = (struct mem_ctl_info *)0; + csi = edac_align_ptr(&mci[1], sizeof(*csi)); + chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); + pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); + size = ((unsigned long)pvt) + sz_pvt; + + mci = kzalloc(size, GFP_KERNEL); + if (mci == NULL) return NULL; /* Adjust pointers so they point within the memory we just allocated * rather than an imaginary chunk of memory located at address 0. */ - csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi)); - chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi)); - pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL; + csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); + chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); + pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; - memset(mci, 0, size); /* clear all fields */ + /* setup index and various internal pointers */ + mci->mc_idx = edac_index; mci->csrows = csi; mci->pvt_info = pvt; mci->nr_csrows = nr_csrows; @@ -1366,17 +182,35 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, } } + mci->op_state = OP_ALLOC; + + /* + * Initialize the 'root' kobj for the edac_mc controller + */ + err = edac_mc_register_sysfs_main_kobj(mci); + if (err) { + kfree(mci); + return NULL; + } + + /* at this point, the root kobj is valid, and in order to + * 'free' the object, then the function: + * edac_mc_unregister_sysfs_main_kobj() must be called + * which will perform kobj unregistration and the actual free + * will occur during the kobject callback operation + */ return mci; } EXPORT_SYMBOL_GPL(edac_mc_alloc); /** - * edac_mc_free: Free a previously allocated 'mci' structure + * edac_mc_free + * 'Free' a previously allocated 'mci' structure * @mci: pointer to a struct mem_ctl_info structure */ void edac_mc_free(struct mem_ctl_info *mci) { - kfree(mci); + edac_mc_unregister_sysfs_main_kobj(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); @@ -1397,18 +231,136 @@ static struct mem_ctl_info *find_mci_by_dev(struct device *dev) return NULL; } +/* + * handler for EDAC to check if NMI type handler has asserted interrupt + */ +static int edac_mc_assert_error_check_and_clear(void) +{ + int old_state; + + if (edac_op_state == EDAC_OPSTATE_POLL) + return 1; + + old_state = edac_err_assert; + edac_err_assert = 0; + + return old_state; +} + +/* + * edac_mc_workq_function + * performs the operation scheduled by a workq request + */ +static void edac_mc_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); + + mutex_lock(&mem_ctls_mutex); + + /* if this control struct has movd to offline state, we are done */ + if (mci->op_state == OP_OFFLINE) { + mutex_unlock(&mem_ctls_mutex); + return; + } + + /* Only poll controllers that are running polled and have a check */ + if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) + mci->edac_check(mci); + + /* + * FIXME: temp place holder for PCI checks, + * goes away when we break out PCI + */ + edac_pci_do_parity_check(); + + mutex_unlock(&mem_ctls_mutex); + + /* Reschedule */ + queue_delayed_work(edac_workqueue, &mci->work, + msecs_to_jiffies(edac_mc_get_poll_msec())); +} + +/* + * edac_mc_workq_setup + * initialize a workq item for this mci + * passing in the new delay period in msec + * + * locking model: + * + * called with the mem_ctls_mutex held + */ +static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) +{ + debugf0("%s()\n", __func__); + + /* if this instance is not in the POLL state, then simply return */ + if (mci->op_state != OP_RUNNING_POLL) + return; + + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); +} + +/* + * edac_mc_workq_teardown + * stop the workq processing on this mci + * + * locking model: + * + * called WITHOUT lock held + */ +static void edac_mc_workq_teardown(struct mem_ctl_info *mci) +{ + int status; + + /* if not running POLL, leave now */ + if (mci->op_state == OP_RUNNING_POLL) { + status = cancel_delayed_work(&mci->work); + if (status == 0) { + debugf0("%s() not canceled, flush the queue\n", + __func__); + + /* workq instance might be running, wait for it */ + flush_workqueue(edac_workqueue); + } + } +} + +/* + * edac_reset_delay_period + */ +static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) +{ + /* cancel the current workq request */ + edac_mc_workq_teardown(mci); + + /* lock the list of devices for the new setup */ + mutex_lock(&mem_ctls_mutex); + + /* restart the workq request, with new delay value */ + edac_mc_workq_setup(mci, value); + + mutex_unlock(&mem_ctls_mutex); +} + /* Return 0 on success, 1 on failure. * Before calling this function, caller must * assign a unique value to mci->mc_idx. + * + * locking model: + * + * called with the mem_ctls_mutex lock held */ -static int add_mc_to_global_list (struct mem_ctl_info *mci) +static int add_mc_to_global_list(struct mem_ctl_info *mci) { struct list_head *item, *insert_before; struct mem_ctl_info *p; insert_before = &mc_devices; - if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) + p = find_mci_by_dev(mci->dev); + if (unlikely(p != NULL)) goto fail0; list_for_each(item, &mc_devices) { @@ -1424,18 +376,19 @@ static int add_mc_to_global_list (struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); + atomic_inc(&edac_handlers); return 0; fail0: edac_printk(KERN_WARNING, EDAC_MC, - "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, - dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx); + "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, + dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); return 1; fail1: edac_printk(KERN_WARNING, EDAC_MC, - "bug in low-level driver: attempt to assign\n" - " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); + "bug in low-level driver: attempt to assign\n" + " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); return 1; } @@ -1450,6 +403,7 @@ static void complete_mc_list_del(struct rcu_head *head) static void del_mc_from_global_list(struct mem_ctl_info *mci) { + atomic_dec(&edac_handlers); list_del_rcu(&mci->link); init_completion(&mci->complete); call_rcu(&mci->rcu, complete_mc_list_del); @@ -1457,6 +411,34 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) } /** + * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. + * + * If found, return a pointer to the structure. + * Else return NULL. + * + * Caller must hold mem_ctls_mutex. + */ +struct mem_ctl_info *edac_mc_find(int idx) +{ + struct list_head *item; + struct mem_ctl_info *mci; + + list_for_each(item, &mc_devices) { + mci = list_entry(item, struct mem_ctl_info, link); + + if (mci->mc_idx >= idx) { + if (mci->mc_idx == idx) + return mci; + + break; + } + } + + return NULL; +} +EXPORT_SYMBOL(edac_mc_find); + +/** * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and * create sysfs entries associated with mci structure * @mci: pointer to the mci structure to be added to the list @@ -1468,10 +450,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ /* FIXME - should a warning be printed if no error detection? correction? */ -int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) +int edac_mc_add_mc(struct mem_ctl_info *mci) { debugf0("%s()\n", __func__); - mci->mc_idx = mc_idx; + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -1484,12 +466,12 @@ int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) edac_mc_dump_csrow(&mci->csrows[i]); for (j = 0; j < mci->csrows[i].nr_channels; j++) - edac_mc_dump_channel( - &mci->csrows[i].channels[j]); + edac_mc_dump_channel(&mci->csrows[i]. + channels[j]); } } #endif - down(&mem_ctls_mutex); + mutex_lock(&mem_ctls_mutex); if (add_mc_to_global_list(mci)) goto fail0; @@ -1503,18 +485,28 @@ int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) goto fail1; } + /* If there IS a check routine, then we are running POLLED */ + if (mci->edac_check != NULL) { + /* This instance is NOW RUNNING */ + mci->op_state = OP_RUNNING_POLL; + + edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); + } else { + mci->op_state = OP_RUNNING_INTERRUPT; + } + /* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", - mci->mod_name, mci->ctl_name, dev_name(mci->dev)); + edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" + " DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci)); - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); return 0; fail1: del_mc_from_global_list(mci); fail0: - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); return 1; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -1526,29 +518,41 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc); * * Return pointer to removed mci structure, or NULL if device not found. */ -struct mem_ctl_info * edac_mc_del_mc(struct device *dev) +struct mem_ctl_info *edac_mc_del_mc(struct device *dev) { struct mem_ctl_info *mci; - debugf0("MC: %s()\n", __func__); - down(&mem_ctls_mutex); + debugf0("%s()\n", __func__); + + mutex_lock(&mem_ctls_mutex); - if ((mci = find_mci_by_dev(dev)) == NULL) { - up(&mem_ctls_mutex); + /* find the requested mci struct in the global list */ + mci = find_mci_by_dev(dev); + if (mci == NULL) { + mutex_unlock(&mem_ctls_mutex); return NULL; } - edac_remove_sysfs_mci_device(mci); + /* marking MCI offline */ + mci->op_state = OP_OFFLINE; + del_mc_from_global_list(mci); - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); + + /* flush workq processes and remove sysfs */ + edac_mc_workq_teardown(mci); + edac_remove_sysfs_mci_device(mci); + edac_printk(KERN_INFO, EDAC_MC, "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, - mci->mod_name, mci->ctl_name, dev_name(mci->dev)); + mci->mod_name, mci->ctl_name, dev_name(mci)); + return mci; } EXPORT_SYMBOL_GPL(edac_mc_del_mc); -void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) +static void edac_mc_scrub_block(unsigned long page, unsigned long offset, + u32 size) { struct page *pg; void *virt_addr; @@ -1557,7 +561,7 @@ void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) debugf3("%s()\n", __func__); /* ECC error page was not in our memory. Ignore it. */ - if(!pfn_valid(page)) + if (!pfn_valid(page)) return; /* Find the actual page structure then map it and fix */ @@ -1577,7 +581,6 @@ void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) if (PageHighMem(pg)) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(edac_mc_scrub_block); /* FIXME - should return -1 */ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) @@ -1611,7 +614,7 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) if (row == -1) edac_mc_printk(mci, KERN_ERR, "could not look up page error address %lx\n", - (unsigned long) page); + (unsigned long)page); return row; } @@ -1620,8 +623,9 @@ EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); /* FIXME - setable log (warning/emerg) levels */ /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - unsigned long syndrome, int row, int channel, const char *msg) + unsigned long page_frame_number, + unsigned long offset_in_page, unsigned long syndrome, + int row, int channel, const char *msg) { unsigned long remapped_page; @@ -1647,7 +651,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci, return; } - if (log_ce) + if (edac_mc_get_log_ce()) /* FIXME - put in DIMM location */ edac_mc_printk(mci, KERN_WARNING, "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " @@ -1671,18 +675,18 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci, * page - which can then be scrubbed. */ remapped_page = mci->ctl_page_to_phys ? - mci->ctl_page_to_phys(mci, page_frame_number) : - page_frame_number; + mci->ctl_page_to_phys(mci, page_frame_number) : + page_frame_number; edac_mc_scrub_block(remapped_page, offset_in_page, - mci->csrows[row].grain); + mci->csrows[row].grain); } } EXPORT_SYMBOL_GPL(edac_mc_handle_ce); void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) { - if (log_ce) + if (edac_mc_get_log_ce()) edac_mc_printk(mci, KERN_WARNING, "CE - no information available: %s\n", msg); @@ -1692,8 +696,8 @@ void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - int row, const char *msg) + unsigned long page_frame_number, + unsigned long offset_in_page, int row, const char *msg) { int len = EDAC_MC_LABEL_LEN * 4; char labels[len + 1]; @@ -1714,26 +718,26 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci, } chars = snprintf(pos, len + 1, "%s", - mci->csrows[row].channels[0].label); + mci->csrows[row].channels[0].label); len -= chars; pos += chars; for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); - chan++) { + chan++) { chars = snprintf(pos, len + 1, ":%s", - mci->csrows[row].channels[chan].label); + mci->csrows[row].channels[chan].label); len -= chars; pos += chars; } - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_EMERG, "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " "labels \"%s\": %s\n", page_frame_number, - offset_in_page, mci->csrows[row].grain, row, labels, - msg); + offset_in_page, mci->csrows[row].grain, row, + labels, msg); - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " "row %d, labels \"%s\": %s\n", mci->mc_idx, page_frame_number, offset_in_page, @@ -1746,10 +750,10 @@ EXPORT_SYMBOL_GPL(edac_mc_handle_ue); void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) { - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_WARNING, "UE - no information available: %s\n", msg); mci->ue_noinfo_count++; @@ -1757,16 +761,14 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) } EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); - /************************************************************* * On Fully Buffered DIMM modules, this help function is * called to process UE events */ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channela, - unsigned int channelb, - char *msg) + unsigned int csrow, + unsigned int channela, + unsigned int channelb, char *msg) { int len = EDAC_MC_LABEL_LEN * 4; char labels[len + 1]; @@ -1808,20 +810,21 @@ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, /* Generate the DIMM labels from the specified channels */ chars = snprintf(pos, len + 1, "%s", mci->csrows[csrow].channels[channela].label); - len -= chars; pos += chars; + len -= chars; + pos += chars; chars = snprintf(pos, len + 1, "-%s", mci->csrows[csrow].channels[channelb].label); - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_EMERG, "UE row %d, channel-a= %d channel-b= %d " "labels \"%s\": %s\n", csrow, channela, channelb, labels, msg); - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("UE row %d, channel-a= %d channel-b= %d " - "labels \"%s\": %s\n", csrow, channela, - channelb, labels, msg); + "labels \"%s\": %s\n", csrow, channela, + channelb, labels, msg); } EXPORT_SYMBOL(edac_mc_handle_fbd_ue); @@ -1830,9 +833,7 @@ EXPORT_SYMBOL(edac_mc_handle_fbd_ue); * called to process CE events */ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel, - char *msg) + unsigned int csrow, unsigned int channel, char *msg) { /* Ensure boundary values */ @@ -1853,13 +854,12 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, return; } - if (log_ce) + if (edac_mc_get_log_ce()) /* FIXME - put in DIMM location */ edac_mc_printk(mci, KERN_WARNING, "CE row %d, channel %d, label \"%s\": %s\n", csrow, channel, - mci->csrows[csrow].channels[channel].label, - msg); + mci->csrows[csrow].channels[channel].label, msg); mci->ce_count++; mci->csrows[csrow].ce_count++; @@ -1867,17 +867,16 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, } EXPORT_SYMBOL(edac_mc_handle_fbd_ce); - /* * Iterate over all MC instances and check for ECC, et al, errors */ -static inline void check_mc_devices(void) +void edac_check_mc_devices(void) { struct list_head *item; struct mem_ctl_info *mci; debugf3("%s()\n", __func__); - down(&mem_ctls_mutex); + mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); @@ -1886,120 +885,5 @@ static inline void check_mc_devices(void) mci->edac_check(mci); } - up(&mem_ctls_mutex); -} - -/* - * Check MC status every poll_msec. - * Check PCI status every poll_msec as well. - * - * This where the work gets done for edac. - * - * SMP safe, doesn't use NMI, and auto-rate-limits. - */ -static void do_edac_check(void) -{ - debugf3("%s()\n", __func__); - check_mc_devices(); - do_pci_parity_check(); -} - -static int edac_kernel_thread(void *arg) -{ - set_freezable(); - while (!kthread_should_stop()) { - do_edac_check(); - - /* goto sleep for the interval */ - schedule_timeout_interruptible((HZ * poll_msec) / 1000); - try_to_freeze(); - } - - return 0; + mutex_unlock(&mem_ctls_mutex); } - -/* - * edac_mc_init - * module initialization entry point - */ -static int __init edac_mc_init(void) -{ - edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n"); - - /* - * Harvest and clear any boot/initialization PCI parity errors - * - * FIXME: This only clears errors logged by devices present at time of - * module initialization. We should also do an initial clear - * of each newly hotplugged device. - */ - clear_pci_parity_errors(); - - /* Create the MC sysfs entries */ - if (edac_sysfs_memctrl_setup()) { - edac_printk(KERN_ERR, EDAC_MC, - "Error initializing sysfs code\n"); - return -ENODEV; - } - - /* Create the PCI parity sysfs entries */ - if (edac_sysfs_pci_setup()) { - edac_sysfs_memctrl_teardown(); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC PCI: Error initializing sysfs code\n"); - return -ENODEV; - } - - /* create our kernel thread */ - edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac"); - - if (IS_ERR(edac_thread)) { - /* remove the sysfs entries */ - edac_sysfs_memctrl_teardown(); - edac_sysfs_pci_teardown(); - return PTR_ERR(edac_thread); - } - - return 0; -} - -/* - * edac_mc_exit() - * module exit/termination functioni - */ -static void __exit edac_mc_exit(void) -{ - debugf0("%s()\n", __func__); - kthread_stop(edac_thread); - - /* tear down the sysfs device */ - edac_sysfs_memctrl_teardown(); - edac_sysfs_pci_teardown(); -} - -module_init(edac_mc_init); -module_exit(edac_mc_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on work by Dan Hollis et al"); -MODULE_DESCRIPTION("Core library routines for MC reporting"); - -module_param(panic_on_ue, int, 0644); -MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); -#ifdef CONFIG_PCI -module_param(check_pci_parity, int, 0644); -MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); -module_param(panic_on_pci_parity, int, 0644); -MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); -#endif -module_param(log_ue, int, 0644); -MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); -module_param(log_ce, int, 0644); -MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on"); -module_param(poll_msec, int, 0644); -MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds"); -#ifdef CONFIG_EDAC_DEBUG -module_param(edac_debug_level, int, 0644); -MODULE_PARM_DESC(edac_debug_level, "Debug level"); -#endif diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c new file mode 100644 index 00000000000..cd090b0677a --- /dev/null +++ b/drivers/edac/edac_mc_sysfs.c @@ -0,0 +1,1024 @@ +/* + * edac_mc kernel module + * (C) 2005-2007 Linux Networx (http://lnxi.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com + * + */ + +#include <linux/ctype.h> +#include <linux/bug.h> + +#include "edac_core.h" +#include "edac_module.h" + + +/* MC EDAC Controls, setable by module parameter, and sysfs */ +static int edac_mc_log_ue = 1; +static int edac_mc_log_ce = 1; +static int edac_mc_panic_on_ue; +static int edac_mc_poll_msec = 1000; + +/* Getter functions for above */ +int edac_mc_get_log_ue(void) +{ + return edac_mc_log_ue; +} + +int edac_mc_get_log_ce(void) +{ + return edac_mc_log_ce; +} + +int edac_mc_get_panic_on_ue(void) +{ + return edac_mc_panic_on_ue; +} + +/* this is temporary */ +int edac_mc_get_poll_msec(void) +{ + return edac_mc_poll_msec; +} + +/* Parameter declarations for above */ +module_param(edac_mc_panic_on_ue, int, 0644); +MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); +module_param(edac_mc_log_ue, int, 0644); +MODULE_PARM_DESC(edac_mc_log_ue, + "Log uncorrectable error to console: 0=off 1=on"); +module_param(edac_mc_log_ce, int, 0644); +MODULE_PARM_DESC(edac_mc_log_ce, + "Log correctable error to console: 0=off 1=on"); +module_param(edac_mc_poll_msec, int, 0644); +MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); + +/* + * various constants for Memory Controllers + */ +static const char *mem_types[] = { + [MEM_EMPTY] = "Empty", + [MEM_RESERVED] = "Reserved", + [MEM_UNKNOWN] = "Unknown", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "Unbuffered-SDR", + [MEM_RDR] = "Registered-SDR", + [MEM_DDR] = "Unbuffered-DDR", + [MEM_RDDR] = "Registered-DDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "Unbuffered-DDR2", + [MEM_FB_DDR2] = "FullyBuffered-DDR2", + [MEM_RDDR2] = "Registered-DDR2" +}; + +static const char *dev_types[] = { + [DEV_UNKNOWN] = "Unknown", + [DEV_X1] = "x1", + [DEV_X2] = "x2", + [DEV_X4] = "x4", + [DEV_X8] = "x8", + [DEV_X16] = "x16", + [DEV_X32] = "x32", + [DEV_X64] = "x64" +}; + +static const char *edac_caps[] = { + [EDAC_UNKNOWN] = "Unknown", + [EDAC_NONE] = "None", + [EDAC_RESERVED] = "Reserved", + [EDAC_PARITY] = "PARITY", + [EDAC_EC] = "EC", + [EDAC_SECDED] = "SECDED", + [EDAC_S2ECD2ED] = "S2ECD2ED", + [EDAC_S4ECD4ED] = "S4ECD4ED", + [EDAC_S8ECD8ED] = "S8ECD8ED", + [EDAC_S16ECD16ED] = "S16ECD16ED" +}; + + + +/* + * /sys/devices/system/edac/mc; + * data structures and methods + */ +static ssize_t memctrl_int_show(void *ptr, char *buffer) +{ + int *value = (int *)ptr; + return sprintf(buffer, "%u\n", *value); +} + +static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) +{ + int *value = (int *)ptr; + + if (isdigit(*buffer)) + *value = simple_strtoul(buffer, NULL, 0); + + return count; +} + + +/* EDAC sysfs CSROW data structures and methods + */ + +/* Set of more default csrow<id> attribute show/store functions */ +static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", csrow->ue_count); +} + +static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", csrow->ce_count); +} + +static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); +} + +static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", mem_types[csrow->mtype]); +} + +static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", dev_types[csrow->dtype]); +} + +static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]); +} + +/* show/store functions for DIMM Label attributes */ +static ssize_t channel_dimm_label_show(struct csrow_info *csrow, + char *data, int channel) +{ + return snprintf(data, EDAC_MC_LABEL_LEN, "%s", + csrow->channels[channel].label); +} + +static ssize_t channel_dimm_label_store(struct csrow_info *csrow, + const char *data, + size_t count, int channel) +{ + ssize_t max_size = 0; + + max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); + strncpy(csrow->channels[channel].label, data, max_size); + csrow->channels[channel].label[max_size] = '\0'; + + return max_size; +} + +/* show function for dynamic chX_ce_count attribute */ +static ssize_t channel_ce_count_show(struct csrow_info *csrow, + char *data, int channel) +{ + return sprintf(data, "%u\n", csrow->channels[channel].ce_count); +} + +/* csrow specific attribute structure */ +struct csrowdev_attribute { + struct attribute attr; + ssize_t(*show) (struct csrow_info *, char *, int); + ssize_t(*store) (struct csrow_info *, const char *, size_t, int); + int private; +}; + +#define to_csrow(k) container_of(k, struct csrow_info, kobj) +#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) + +/* Set of show/store higher level functions for default csrow attributes */ +static ssize_t csrowdev_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct csrow_info *csrow = to_csrow(kobj); + struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); + + if (csrowdev_attr->show) + return csrowdev_attr->show(csrow, + buffer, csrowdev_attr->private); + return -EIO; +} + +static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct csrow_info *csrow = to_csrow(kobj); + struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); + + if (csrowdev_attr->store) + return csrowdev_attr->store(csrow, + buffer, + count, csrowdev_attr->private); + return -EIO; +} + +static struct sysfs_ops csrowfs_ops = { + .show = csrowdev_show, + .store = csrowdev_store +}; + +#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ +static struct csrowdev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .private = _private, \ +}; + +/* default cwrow<id>/attribute files */ +CSROWDEV_ATTR(size_mb, S_IRUGO, csrow_size_show, NULL, 0); +CSROWDEV_ATTR(dev_type, S_IRUGO, csrow_dev_type_show, NULL, 0); +CSROWDEV_ATTR(mem_type, S_IRUGO, csrow_mem_type_show, NULL, 0); +CSROWDEV_ATTR(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL, 0); +CSROWDEV_ATTR(ue_count, S_IRUGO, csrow_ue_count_show, NULL, 0); +CSROWDEV_ATTR(ce_count, S_IRUGO, csrow_ce_count_show, NULL, 0); + +/* default attributes of the CSROW<id> object */ +static struct csrowdev_attribute *default_csrow_attr[] = { + &attr_dev_type, + &attr_mem_type, + &attr_edac_mode, + &attr_size_mb, + &attr_ue_count, + &attr_ce_count, + NULL, +}; + +/* possible dynamic channel DIMM Label attribute files */ +CSROWDEV_ATTR(ch0_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 0); +CSROWDEV_ATTR(ch1_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 1); +CSROWDEV_ATTR(ch2_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 2); +CSROWDEV_ATTR(ch3_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 3); +CSROWDEV_ATTR(ch4_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 4); +CSROWDEV_ATTR(ch5_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 5); + +/* Total possible dynamic DIMM Label attribute file table */ +static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { + &attr_ch0_dimm_label, + &attr_ch1_dimm_label, + &attr_ch2_dimm_label, + &attr_ch3_dimm_label, + &attr_ch4_dimm_label, + &attr_ch5_dimm_label +}; + +/* possible dynamic channel ce_count attribute files */ +CSROWDEV_ATTR(ch0_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 0); +CSROWDEV_ATTR(ch1_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 1); +CSROWDEV_ATTR(ch2_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 2); +CSROWDEV_ATTR(ch3_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 3); +CSROWDEV_ATTR(ch4_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 4); +CSROWDEV_ATTR(ch5_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 5); + +/* Total possible dynamic ce_count attribute file table */ +static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { + &attr_ch0_ce_count, + &attr_ch1_ce_count, + &attr_ch2_ce_count, + &attr_ch3_ce_count, + &attr_ch4_ce_count, + &attr_ch5_ce_count +}; + +#define EDAC_NR_CHANNELS 6 + +/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ +static int edac_create_channel_files(struct kobject *kobj, int chan) +{ + int err = -ENODEV; + + if (chan >= EDAC_NR_CHANNELS) + return err; + + /* create the DIMM label attribute file */ + err = sysfs_create_file(kobj, + (struct attribute *) + dynamic_csrow_dimm_attr[chan]); + + if (!err) { + /* create the CE Count attribute file */ + err = sysfs_create_file(kobj, + (struct attribute *) + dynamic_csrow_ce_count_attr[chan]); + } else { + debugf1("%s() dimm labels and ce_count files created", + __func__); + } + + return err; +} + +/* No memory to release for this kobj */ +static void edac_csrow_instance_release(struct kobject *kobj) +{ + struct mem_ctl_info *mci; + struct csrow_info *cs; + + debugf1("%s()\n", __func__); + + cs = container_of(kobj, struct csrow_info, kobj); + mci = cs->mci; + + kobject_put(&mci->edac_mci_kobj); +} + +/* the kobj_type instance for a CSROW */ +static struct kobj_type ktype_csrow = { + .release = edac_csrow_instance_release, + .sysfs_ops = &csrowfs_ops, + .default_attrs = (struct attribute **)default_csrow_attr, +}; + +/* Create a CSROW object under specifed edac_mc_device */ +static int edac_create_csrow_object(struct mem_ctl_info *mci, + struct csrow_info *csrow, int index) +{ + struct kobject *kobj_mci = &mci->edac_mci_kobj; + struct kobject *kobj; + int chan; + int err; + + /* generate ..../edac/mc/mc<id>/csrow<index> */ + memset(&csrow->kobj, 0, sizeof(csrow->kobj)); + csrow->mci = mci; /* include container up link */ + csrow->kobj.parent = kobj_mci; + csrow->kobj.ktype = &ktype_csrow; + + /* name this instance of csrow<id> */ + err = kobject_set_name(&csrow->kobj, "csrow%d", index); + if (err) + goto err_out; + + /* bump the mci instance's kobject's ref count */ + kobj = kobject_get(&mci->edac_mci_kobj); + if (!kobj) { + err = -ENODEV; + goto err_out; + } + + /* Instanstiate the csrow object */ + err = kobject_register(&csrow->kobj); + if (err) + goto err_release_top_kobj; + + /* At this point, to release a csrow kobj, one must + * call the kobject_unregister and allow that tear down + * to work the releasing + */ + + /* Create the dyanmic attribute files on this csrow, + * namely, the DIMM labels and the channel ce_count + */ + for (chan = 0; chan < csrow->nr_channels; chan++) { + err = edac_create_channel_files(&csrow->kobj, chan); + if (err) { + /* special case the unregister here */ + kobject_unregister(&csrow->kobj); + goto err_out; + } + } + + return 0; + + /* error unwind stack */ +err_release_top_kobj: + kobject_put(&mci->edac_mci_kobj); + +err_out: + return err; +} + +/* default sysfs methods and data structures for the main MCI kobject */ + +static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, + const char *data, size_t count) +{ + int row, chan; + + mci->ue_noinfo_count = 0; + mci->ce_noinfo_count = 0; + mci->ue_count = 0; + mci->ce_count = 0; + + for (row = 0; row < mci->nr_csrows; row++) { + struct csrow_info *ri = &mci->csrows[row]; + + ri->ue_count = 0; + ri->ce_count = 0; + + for (chan = 0; chan < ri->nr_channels; chan++) + ri->channels[chan].ce_count = 0; + } + + mci->start_time = jiffies; + return count; +} + +/* memory scrubbing */ +static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, + const char *data, size_t count) +{ + u32 bandwidth = -1; + + if (mci->set_sdram_scrub_rate) { + + memctrl_int_store(&bandwidth, data, count); + + if (!(*mci->set_sdram_scrub_rate) (mci, &bandwidth)) { + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate set successfully, applied: %d\n", + bandwidth); + } else { + /* FIXME: error codes maybe? */ + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate set FAILED, could not apply: %d\n", + bandwidth); + } + } else { + /* FIXME: produce "not implemented" ERROR for user-side. */ + edac_printk(KERN_WARNING, EDAC_MC, + "Memory scrubbing 'set'control is not implemented!\n"); + } + return count; +} + +static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) +{ + u32 bandwidth = -1; + + if (mci->get_sdram_scrub_rate) { + if (!(*mci->get_sdram_scrub_rate) (mci, &bandwidth)) { + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate successfully, fetched: %d\n", + bandwidth); + } else { + /* FIXME: error codes maybe? */ + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate fetch FAILED, got: %d\n", + bandwidth); + } + } else { + /* FIXME: produce "not implemented" ERROR for user-side. */ + edac_printk(KERN_WARNING, EDAC_MC, + "Memory scrubbing 'get' control is not implemented\n"); + } + return sprintf(data, "%d\n", bandwidth); +} + +/* default attribute files for the MCI object */ +static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ue_count); +} + +static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ce_count); +} + +static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ce_noinfo_count); +} + +static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ue_noinfo_count); +} + +static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ); +} + +static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%s\n", mci->ctl_name); +} + +static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) +{ + int total_pages, csrow_idx; + + for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; + csrow_idx++) { + struct csrow_info *csrow = &mci->csrows[csrow_idx]; + + if (!csrow->nr_pages) + continue; + + total_pages += csrow->nr_pages; + } + + return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages)); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) +#define to_mcidev_attr(a) container_of(a,struct mcidev_sysfs_attribute,attr) + +/* MCI show/store functions for top most object */ +static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct mem_ctl_info *mem_ctl_info = to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + if (mcidev_attr->show) + return mcidev_attr->show(mem_ctl_info, buffer); + + return -EIO; +} + +static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct mem_ctl_info *mem_ctl_info = to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + if (mcidev_attr->store) + return mcidev_attr->store(mem_ctl_info, buffer, count); + + return -EIO; +} + +/* Intermediate show/store table */ +static struct sysfs_ops mci_ops = { + .show = mcidev_show, + .store = mcidev_store +}; + +#define MCIDEV_ATTR(_name,_mode,_show,_store) \ +static struct mcidev_sysfs_attribute mci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* default Control file */ +MCIDEV_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); + +/* default Attribute files */ +MCIDEV_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); +MCIDEV_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); +MCIDEV_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); +MCIDEV_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); +MCIDEV_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); +MCIDEV_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); +MCIDEV_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); + +/* memory scrubber attribute file */ +MCIDEV_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, + mci_sdram_scrub_rate_store); + +static struct mcidev_sysfs_attribute *mci_attr[] = { + &mci_attr_reset_counters, + &mci_attr_mc_name, + &mci_attr_size_mb, + &mci_attr_seconds_since_reset, + &mci_attr_ue_noinfo_count, + &mci_attr_ce_noinfo_count, + &mci_attr_ue_count, + &mci_attr_ce_count, + &mci_attr_sdram_scrub_rate, + NULL +}; + + +/* + * Release of a MC controlling instance + * + * each MC control instance has the following resources upon entry: + * a) a ref count on the top memctl kobj + * b) a ref count on this module + * + * this function must decrement those ref counts and then + * issue a free on the instance's memory + */ +static void edac_mci_control_release(struct kobject *kobj) +{ + struct mem_ctl_info *mci; + + mci = to_mci(kobj); + + debugf0("%s() mci instance idx=%d releasing\n", __func__, mci->mc_idx); + + /* decrement the module ref count */ + module_put(mci->owner); + + /* free the mci instance memory here */ + kfree(mci); +} + +static struct kobj_type ktype_mci = { + .release = edac_mci_control_release, + .sysfs_ops = &mci_ops, + .default_attrs = (struct attribute **)mci_attr, +}; + +/* show/store, tables, etc for the MC kset */ + + +struct memctrl_dev_attribute { + struct attribute attr; + void *value; + ssize_t(*show) (void *, char *); + ssize_t(*store) (void *, const char *, size_t); +}; + +/* Set of show/store abstract level functions for memory control object */ +static ssize_t memctrl_dev_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct memctrl_dev_attribute *memctrl_dev; + memctrl_dev = (struct memctrl_dev_attribute *)attr; + + if (memctrl_dev->show) + return memctrl_dev->show(memctrl_dev->value, buffer); + + return -EIO; +} + +static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct memctrl_dev_attribute *memctrl_dev; + memctrl_dev = (struct memctrl_dev_attribute *)attr; + + if (memctrl_dev->store) + return memctrl_dev->store(memctrl_dev->value, buffer, count); + + return -EIO; +} + +static struct sysfs_ops memctrlfs_ops = { + .show = memctrl_dev_show, + .store = memctrl_dev_store +}; + +#define MEMCTRL_ATTR(_name, _mode, _show, _store) \ +static struct memctrl_dev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = &_name, \ + .show = _show, \ + .store = _store, \ +}; + +#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \ +static struct memctrl_dev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = _data, \ + .show = _show, \ + .store = _store, \ +}; + +/* csrow<id> control files */ +MEMCTRL_ATTR(edac_mc_panic_on_ue, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_log_ue, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_log_ce, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_poll_msec, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +/* Base Attributes of the memory ECC object */ +static struct memctrl_dev_attribute *memctrl_attr[] = { + &attr_edac_mc_panic_on_ue, + &attr_edac_mc_log_ue, + &attr_edac_mc_log_ce, + &attr_edac_mc_poll_msec, + NULL, +}; + + +/* the ktype for the mc_kset internal kobj */ +static struct kobj_type ktype_mc_set_attribs = { + .sysfs_ops = &memctrlfs_ops, + .default_attrs = (struct attribute **)memctrl_attr, +}; + +/* EDAC memory controller sysfs kset: + * /sys/devices/system/edac/mc + */ +static struct kset mc_kset = { + .kobj = {.name = "mc", .ktype = &ktype_mc_set_attribs }, + .ktype = &ktype_mci, +}; + + +/* + * edac_mc_register_sysfs_main_kobj + * + * setups and registers the main kobject for each mci + */ +int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci) +{ + struct kobject *kobj_mci; + int err; + + debugf1("%s()\n", __func__); + + kobj_mci = &mci->edac_mci_kobj; + + /* Init the mci's kobject */ + memset(kobj_mci, 0, sizeof(*kobj_mci)); + + /* this instance become part of the mc_kset */ + kobj_mci->kset = &mc_kset; + + /* set the name of the mc<id> object */ + err = kobject_set_name(kobj_mci, "mc%d", mci->mc_idx); + if (err) + goto fail_out; + + /* Record which module 'owns' this control structure + * and bump the ref count of the module + */ + mci->owner = THIS_MODULE; + + /* bump ref count on this module */ + if (!try_module_get(mci->owner)) { + err = -ENODEV; + goto fail_out; + } + + /* register the mc<id> kobject to the mc_kset */ + err = kobject_register(kobj_mci); + if (err) { + debugf1("%s()Failed to register '.../edac/mc%d'\n", + __func__, mci->mc_idx); + goto kobj_reg_fail; + } + + /* At this point, to 'free' the control struct, + * edac_mc_unregister_sysfs_main_kobj() must be used + */ + + debugf1("%s() Registered '.../edac/mc%d' kobject\n", + __func__, mci->mc_idx); + + return 0; + + /* Error exit stack */ + +kobj_reg_fail: + module_put(mci->owner); + +fail_out: + return err; +} + +/* + * edac_mc_register_sysfs_main_kobj + * + * tears down and the main mci kobject from the mc_kset + */ +void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci) +{ + /* delete the kobj from the mc_kset */ + kobject_unregister(&mci->edac_mci_kobj); +} + +#define EDAC_DEVICE_SYMLINK "device" + +/* + * edac_create_mci_instance_attributes + * create MC driver specific attributes at the topmost level + * directory of this mci instance. + */ +static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci) +{ + int err; + struct mcidev_sysfs_attribute *sysfs_attrib; + + /* point to the start of the array and iterate over it + * adding each attribute listed to this mci instance's kobject + */ + sysfs_attrib = mci->mc_driver_sysfs_attributes; + + while (sysfs_attrib && sysfs_attrib->attr.name) { + err = sysfs_create_file(&mci->edac_mci_kobj, + (struct attribute*) sysfs_attrib); + if (err) { + return err; + } + + sysfs_attrib++; + } + + return 0; +} + +/* + * edac_remove_mci_instance_attributes + * remove MC driver specific attributes at the topmost level + * directory of this mci instance. + */ +static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci) +{ + struct mcidev_sysfs_attribute *sysfs_attrib; + + /* point to the start of the array and iterate over it + * adding each attribute listed to this mci instance's kobject + */ + sysfs_attrib = mci->mc_driver_sysfs_attributes; + + /* loop if there are attributes and until we hit a NULL entry */ + while (sysfs_attrib && sysfs_attrib->attr.name) { + sysfs_remove_file(&mci->edac_mci_kobj, + (struct attribute *) sysfs_attrib); + sysfs_attrib++; + } +} + + +/* + * Create a new Memory Controller kobject instance, + * mc<id> under the 'mc' directory + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) +{ + int i; + int err; + struct csrow_info *csrow; + struct kobject *kobj_mci = &mci->edac_mci_kobj; + + debugf0("%s() idx=%d\n", __func__, mci->mc_idx); + + /* create a symlink for the device */ + err = sysfs_create_link(kobj_mci, &mci->dev->kobj, + EDAC_DEVICE_SYMLINK); + if (err) { + debugf1("%s() failure to create symlink\n", __func__); + goto fail0; + } + + /* If the low level driver desires some attributes, + * then create them now for the driver. + */ + if (mci->mc_driver_sysfs_attributes) { + err = edac_create_mci_instance_attributes(mci); + if (err) { + debugf1("%s() failure to create mci attributes\n", + __func__); + goto fail0; + } + } + + /* Make directories for each CSROW object under the mc<id> kobject + */ + for (i = 0; i < mci->nr_csrows; i++) { + csrow = &mci->csrows[i]; + + /* Only expose populated CSROWs */ + if (csrow->nr_pages > 0) { + err = edac_create_csrow_object(mci, csrow, i); + if (err) { + debugf1("%s() failure: create csrow %d obj\n", + __func__, i); + goto fail1; + } + } + } + + return 0; + + /* CSROW error: backout what has already been registered, */ +fail1: + for (i--; i >= 0; i--) { + if (csrow->nr_pages > 0) { + kobject_unregister(&mci->csrows[i].kobj); + } + } + + /* remove the mci instance's attributes, if any */ + edac_remove_mci_instance_attributes(mci); + + /* remove the symlink */ + sysfs_remove_link(kobj_mci, EDAC_DEVICE_SYMLINK); + +fail0: + return err; +} + +/* + * remove a Memory Controller instance + */ +void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) +{ + int i; + + debugf0("%s()\n", __func__); + + /* remove all csrow kobjects */ + for (i = 0; i < mci->nr_csrows; i++) { + if (mci->csrows[i].nr_pages > 0) { + debugf0("%s() unreg csrow-%d\n", __func__, i); + kobject_unregister(&mci->csrows[i].kobj); + } + } + + debugf0("%s() remove_link\n", __func__); + + /* remove the symlink */ + sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); + + debugf0("%s() remove_mci_instance\n", __func__); + + /* remove this mci instance's attribtes */ + edac_remove_mci_instance_attributes(mci); + + debugf0("%s() unregister this mci kobj\n", __func__); + + /* unregister this instance's kobject */ + kobject_unregister(&mci->edac_mci_kobj); +} + + + + +/* + * edac_setup_sysfs_mc_kset(void) + * + * Initialize the mc_kset for the 'mc' entry + * This requires creating the top 'mc' directory with a kset + * and its controls/attributes. + * + * To this 'mc' kset, instance 'mci' will be grouped as children. + * + * Return: 0 SUCCESS + * !0 FAILURE error code + */ +int edac_sysfs_setup_mc_kset(void) +{ + int err = 0; + struct sysdev_class *edac_class; + + debugf1("%s()\n", __func__); + + /* get the /sys/devices/system/edac class reference */ + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class error=%d\n", __func__, err); + goto fail_out; + } + + /* Init the MC's kobject */ + mc_kset.kobj.parent = &edac_class->kset.kobj; + + /* register the mc_kset */ + err = kset_register(&mc_kset); + if (err) { + debugf1("%s() Failed to register '.../edac/mc'\n", __func__); + goto fail_out; + } + + debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); + + return 0; + + + /* error unwind stack */ +fail_out: + return err; +} + +/* + * edac_sysfs_teardown_mc_kset + * + * deconstruct the mc_ket for memory controllers + */ +void edac_sysfs_teardown_mc_kset(void) +{ + kset_unregister(&mc_kset); +} + diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c new file mode 100644 index 00000000000..e0c4a408605 --- /dev/null +++ b/drivers/edac/edac_module.c @@ -0,0 +1,222 @@ +/* + * edac_module.c + * + * (C) 2007 www.softwarebitmaker.com + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * Author: Doug Thompson <dougthompson@xmission.com> + * + */ +#include <linux/edac.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_VERSION "Ver: 2.1.0 " __DATE__ + +#ifdef CONFIG_EDAC_DEBUG +/* Values of 0 to 4 will generate output */ +int edac_debug_level = 2; +EXPORT_SYMBOL_GPL(edac_debug_level); +#endif + +/* scope is to module level only */ +struct workqueue_struct *edac_workqueue; + +/* + * sysfs object: /sys/devices/system/edac + * need to export to other files in this modules + */ +static struct sysdev_class edac_class = { + set_kset_name("edac"), +}; +static int edac_class_valid; + +/* + * edac_op_state_to_string() + */ +char *edac_op_state_to_string(int opstate) +{ + if (opstate == OP_RUNNING_POLL) + return "POLLED"; + else if (opstate == OP_RUNNING_INTERRUPT) + return "INTERRUPT"; + else if (opstate == OP_RUNNING_POLL_INTR) + return "POLL-INTR"; + else if (opstate == OP_ALLOC) + return "ALLOC"; + else if (opstate == OP_OFFLINE) + return "OFFLINE"; + + return "UNKNOWN"; +} + +/* + * edac_get_edac_class() + * + * return pointer to the edac class of 'edac' + */ +struct sysdev_class *edac_get_edac_class(void) +{ + struct sysdev_class *classptr = NULL; + + if (edac_class_valid) + classptr = &edac_class; + + return classptr; +} + +/* + * edac_register_sysfs_edac_name() + * + * register the 'edac' into /sys/devices/system + * + * return: + * 0 success + * !0 error + */ +static int edac_register_sysfs_edac_name(void) +{ + int err; + + /* create the /sys/devices/system/edac directory */ + err = sysdev_class_register(&edac_class); + + if (err) { + debugf1("%s() error=%d\n", __func__, err); + return err; + } + + edac_class_valid = 1; + return 0; +} + +/* + * sysdev_class_unregister() + * + * unregister the 'edac' from /sys/devices/system + */ +static void edac_unregister_sysfs_edac_name(void) +{ + /* only if currently registered, then unregister it */ + if (edac_class_valid) + sysdev_class_unregister(&edac_class); + + edac_class_valid = 0; +} + +/* + * edac_workqueue_setup + * initialize the edac work queue for polling operations + */ +static int edac_workqueue_setup(void) +{ + edac_workqueue = create_singlethread_workqueue("edac-poller"); + if (edac_workqueue == NULL) + return -ENODEV; + else + return 0; +} + +/* + * edac_workqueue_teardown + * teardown the edac workqueue + */ +static void edac_workqueue_teardown(void) +{ + if (edac_workqueue) { + flush_workqueue(edac_workqueue); + destroy_workqueue(edac_workqueue); + edac_workqueue = NULL; + } +} + +/* + * edac_init + * module initialization entry point + */ +static int __init edac_init(void) +{ + int err = 0; + + edac_printk(KERN_INFO, EDAC_MC, EDAC_VERSION "\n"); + + /* + * Harvest and clear any boot/initialization PCI parity errors + * + * FIXME: This only clears errors logged by devices present at time of + * module initialization. We should also do an initial clear + * of each newly hotplugged device. + */ + edac_pci_clear_parity_errors(); + + /* + * perform the registration of the /sys/devices/system/edac class object + */ + if (edac_register_sysfs_edac_name()) { + edac_printk(KERN_ERR, EDAC_MC, + "Error initializing 'edac' kobject\n"); + err = -ENODEV; + goto error; + } + + /* + * now set up the mc_kset under the edac class object + */ + err = edac_sysfs_setup_mc_kset(); + if (err) + goto sysfs_setup_fail; + + /* Setup/Initialize the workq for this core */ + err = edac_workqueue_setup(); + if (err) { + edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n"); + goto workq_fail; + } + + return 0; + + /* Error teardown stack */ +workq_fail: + edac_sysfs_teardown_mc_kset(); + +sysfs_setup_fail: + edac_unregister_sysfs_edac_name(); + +error: + return err; +} + +/* + * edac_exit() + * module exit/termination function + */ +static void __exit edac_exit(void) +{ + debugf0("%s()\n", __func__); + + /* tear down the various subsystems */ + edac_workqueue_teardown(); + edac_sysfs_teardown_mc_kset(); + edac_unregister_sysfs_edac_name(); +} + +/* + * Inform the kernel of our entry and exit points + */ +module_init(edac_init); +module_exit(edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); +MODULE_DESCRIPTION("Core library routines for EDAC reporting"); + +/* refer to *_sysfs.c files for parameters that are exported via sysfs */ + +#ifdef CONFIG_EDAC_DEBUG +module_param(edac_debug_level, int, 0644); +MODULE_PARM_DESC(edac_debug_level, "Debug level"); +#endif diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h new file mode 100644 index 00000000000..a2134dfc3cc --- /dev/null +++ b/drivers/edac/edac_module.h @@ -0,0 +1,77 @@ + +/* + * edac_module.h + * + * For defining functions/data for within the EDAC_CORE module only + * + * written by doug thompson <norsk5@xmission.h> + */ + +#ifndef __EDAC_MODULE_H__ +#define __EDAC_MODULE_H__ + +#include <linux/sysdev.h> + +#include "edac_core.h" + +/* + * INTERNAL EDAC MODULE: + * EDAC memory controller sysfs create/remove functions + * and setup/teardown functions + * + * edac_mc objects + */ +extern int edac_sysfs_setup_mc_kset(void); +extern void edac_sysfs_teardown_mc_kset(void); +extern int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci); +extern void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci); +extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci); +extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); +extern void edac_check_mc_devices(void); +extern int edac_get_log_ue(void); +extern int edac_get_log_ce(void); +extern int edac_get_panic_on_ue(void); +extern int edac_mc_get_log_ue(void); +extern int edac_mc_get_log_ce(void); +extern int edac_mc_get_panic_on_ue(void); +extern int edac_get_poll_msec(void); +extern int edac_mc_get_poll_msec(void); + +extern int edac_device_register_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev); +extern void edac_device_unregister_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev); +extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); +extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); +extern struct sysdev_class *edac_get_edac_class(void); + +/* edac core workqueue: single CPU mode */ +extern struct workqueue_struct *edac_workqueue; +extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec); +extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); +extern void edac_device_reset_delay_period(struct edac_device_ctl_info + *edac_dev, unsigned long value); +extern void *edac_align_ptr(void *ptr, unsigned size); + +/* + * EDAC PCI functions + */ +#ifdef CONFIG_PCI +extern void edac_pci_do_parity_check(void); +extern void edac_pci_clear_parity_errors(void); +extern int edac_sysfs_pci_setup(void); +extern void edac_sysfs_pci_teardown(void); +extern int edac_pci_get_check_errors(void); +extern int edac_pci_get_poll_msec(void); +#else /* CONFIG_PCI */ +/* pre-process these away */ +#define edac_pci_do_parity_check() +#define edac_pci_clear_parity_errors() +#define edac_sysfs_pci_setup() (0) +#define edac_sysfs_pci_teardown() +#define edac_pci_get_check_errors() +#define edac_pci_get_poll_msec() +#endif /* CONFIG_PCI */ + +#endif /* __EDAC_MODULE_H__ */ diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c new file mode 100644 index 00000000000..d9cd5e048ce --- /dev/null +++ b/drivers/edac/edac_pci.c @@ -0,0 +1,433 @@ +/* + * EDAC PCI component + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#include <linux/module.h> +#include <linux/types.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/sysctl.h> +#include <linux/highmem.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <asm/uaccess.h> +#include <asm/page.h> + +#include "edac_core.h" +#include "edac_module.h" + +static DEFINE_MUTEX(edac_pci_ctls_mutex); +static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list); + +static inline void edac_lock_pci_list(void) +{ + mutex_lock(&edac_pci_ctls_mutex); +} + +static inline void edac_unlock_pci_list(void) +{ + mutex_unlock(&edac_pci_ctls_mutex); +} + +/* + * The alloc() and free() functions for the 'edac_pci' control info + * structure. The chip driver will allocate one of these for each + * edac_pci it is going to control/register with the EDAC CORE. + */ +struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, + const char *edac_pci_name) +{ + struct edac_pci_ctl_info *pci; + void *pvt; + unsigned int size; + + pci = (struct edac_pci_ctl_info *)0; + pvt = edac_align_ptr(&pci[1], sz_pvt); + size = ((unsigned long)pvt) + sz_pvt; + + if ((pci = kzalloc(size, GFP_KERNEL)) == NULL) + return NULL; + + pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; + + pci->pvt_info = pvt; + + pci->op_state = OP_ALLOC; + + snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); + +/* + * edac_pci_free_ctl_info() + * frees the memory allocated by edac_pci_alloc_ctl_info() function + */ +void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) +{ + kfree(pci); +} + +EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info); + +/* + * find_edac_pci_by_dev() + * scans the edac_pci list for a specific 'struct device *' + */ +static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev) +{ + struct edac_pci_ctl_info *pci; + struct list_head *item; + + debugf3("%s()\n", __func__); + + list_for_each(item, &edac_pci_list) { + pci = list_entry(item, struct edac_pci_ctl_info, link); + + if (pci->dev == dev) + return pci; + } + + return NULL; +} + +/* + * add_edac_pci_to_global_list + * Before calling this function, caller must assign a unique value to + * edac_dev->pci_idx. + * Return: + * 0 on success + * 1 on failure + */ +static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci) +{ + struct list_head *item, *insert_before; + struct edac_pci_ctl_info *rover; + + insert_before = &edac_pci_list; + + /* Determine if already on the list */ + if (unlikely((rover = find_edac_pci_by_dev(pci->dev)) != NULL)) + goto fail0; + + /* Insert in ascending order by 'pci_idx', so find position */ + list_for_each(item, &edac_pci_list) { + rover = list_entry(item, struct edac_pci_ctl_info, link); + + if (rover->pci_idx >= pci->pci_idx) { + if (unlikely(rover->pci_idx == pci->pci_idx)) + goto fail1; + + insert_before = item; + break; + } + } + + list_add_tail_rcu(&pci->link, insert_before); + return 0; + +fail0: + edac_printk(KERN_WARNING, EDAC_PCI, + "%s (%s) %s %s already assigned %d\n", + rover->dev->bus_id, dev_name(rover), + rover->mod_name, rover->ctl_name, rover->pci_idx); + return 1; + +fail1: + edac_printk(KERN_WARNING, EDAC_PCI, + "but in low-level driver: attempt to assign\n" + "\tduplicate pci_idx %d in %s()\n", rover->pci_idx, + __func__); + return 1; +} + +/* + * complete_edac_pci_list_del + */ +static void complete_edac_pci_list_del(struct rcu_head *head) +{ + struct edac_pci_ctl_info *pci; + + pci = container_of(head, struct edac_pci_ctl_info, rcu); + INIT_LIST_HEAD(&pci->link); + complete(&pci->complete); +} + +/* + * del_edac_pci_from_global_list + */ +static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) +{ + list_del_rcu(&pci->link); + init_completion(&pci->complete); + call_rcu(&pci->rcu, complete_edac_pci_list_del); + wait_for_completion(&pci->complete); +} + +/* + * edac_pci_find() + * Search for an edac_pci_ctl_info structure whose index is 'idx' + * + * If found, return a pointer to the structure + * Else return NULL. + * + * Caller must hold pci_ctls_mutex. + */ +struct edac_pci_ctl_info *edac_pci_find(int idx) +{ + struct list_head *item; + struct edac_pci_ctl_info *pci; + + /* Iterage over list, looking for exact match of ID */ + list_for_each(item, &edac_pci_list) { + pci = list_entry(item, struct edac_pci_ctl_info, link); + + if (pci->pci_idx >= idx) { + if (pci->pci_idx == idx) + return pci; + + /* not on list, so terminate early */ + break; + } + } + + return NULL; +} + +EXPORT_SYMBOL_GPL(edac_pci_find); + +/* + * edac_pci_workq_function() + * performs the operation scheduled by a workq request + */ +static void edac_pci_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); + + edac_lock_pci_list(); + + if ((pci->op_state == OP_RUNNING_POLL) && + (pci->edac_check != NULL) && (edac_pci_get_check_errors())) + pci->edac_check(pci); + + edac_unlock_pci_list(); + + /* Reschedule */ + queue_delayed_work(edac_workqueue, &pci->work, + msecs_to_jiffies(edac_pci_get_poll_msec())); +} + +/* + * edac_pci_workq_setup() + * initialize a workq item for this edac_pci instance + * passing in the new delay period in msec + */ +static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, + unsigned int msec) +{ + debugf0("%s()\n", __func__); + + INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); + queue_delayed_work(edac_workqueue, &pci->work, + msecs_to_jiffies(edac_pci_get_poll_msec())); +} + +/* + * edac_pci_workq_teardown() + * stop the workq processing on this edac_pci instance + */ +static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) +{ + int status; + + status = cancel_delayed_work(&pci->work); + if (status == 0) + flush_workqueue(edac_workqueue); +} + +/* + * edac_pci_reset_delay_period + */ +void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, + unsigned long value) +{ + edac_lock_pci_list(); + + edac_pci_workq_teardown(pci); + + edac_pci_workq_setup(pci, value); + + edac_unlock_pci_list(); +} + +EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); + +/* + * edac_pci_add_device: Insert the 'edac_dev' structure into the + * edac_pci global list and create sysfs entries associated with + * edac_pci structure. + * @pci: pointer to the edac_device structure to be added to the list + * @edac_idx: A unique numeric identifier to be assigned to the + * 'edac_pci' structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) +{ + debugf0("%s()\n", __func__); + + pci->pci_idx = edac_idx; + + edac_lock_pci_list(); + + if (add_edac_pci_to_global_list(pci)) + goto fail0; + + pci->start_time = jiffies; + + if (edac_pci_create_sysfs(pci)) { + edac_pci_printk(pci, KERN_WARNING, + "failed to create sysfs pci\n"); + goto fail1; + } + + if (pci->edac_check != NULL) { + pci->op_state = OP_RUNNING_POLL; + + edac_pci_workq_setup(pci, 1000); + } else { + pci->op_state = OP_RUNNING_INTERRUPT; + } + + edac_pci_printk(pci, KERN_INFO, + "Giving out device to module '%s' controller '%s':" + " DEV '%s' (%s)\n", + pci->mod_name, + pci->ctl_name, + dev_name(pci), edac_op_state_to_string(pci->op_state)); + + edac_unlock_pci_list(); + return 0; + +fail1: + del_edac_pci_from_global_list(pci); +fail0: + edac_unlock_pci_list(); + return 1; +} + +EXPORT_SYMBOL_GPL(edac_pci_add_device); + +/* + * edac_pci_del_device() + * Remove sysfs entries for specified edac_pci structure and + * then remove edac_pci structure from global list + * + * @dev: + * Pointer to 'struct device' representing edac_pci structure + * to remove + * + * Return: + * Pointer to removed edac_pci structure, + * or NULL if device not found + */ +struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) +{ + struct edac_pci_ctl_info *pci; + + debugf0("%s()\n", __func__); + + edac_lock_pci_list(); + + if ((pci = find_edac_pci_by_dev(dev)) == NULL) { + edac_unlock_pci_list(); + return NULL; + } + + pci->op_state = OP_OFFLINE; + + edac_pci_workq_teardown(pci); + + edac_pci_remove_sysfs(pci); + + del_edac_pci_from_global_list(pci); + + edac_unlock_pci_list(); + + edac_printk(KERN_INFO, EDAC_PCI, + "Removed device %d for %s %s: DEV %s\n", + pci->pci_idx, pci->mod_name, pci->ctl_name, dev_name(pci)); + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_del_device); + +void edac_pci_generic_check(struct edac_pci_ctl_info *pci) +{ + edac_pci_do_parity_check(); +} + +static int edac_pci_idx; +#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller" + +struct edac_pci_gen_data { + int edac_idx; +}; + +struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, + const char *mod_name) +{ + struct edac_pci_ctl_info *pci; + struct edac_pci_gen_data *pdata; + + pci = edac_pci_alloc_ctl_info(sizeof(*pdata), EDAC_PCI_GENCTL_NAME); + if (!pci) + return NULL; + + pdata = pci->pvt_info; + pci->dev = dev; + dev_set_drvdata(pci->dev, pci); + pci->dev_name = pci_name(to_pci_dev(dev)); + + pci->mod_name = mod_name; + pci->ctl_name = EDAC_PCI_GENCTL_NAME; + pci->edac_check = edac_pci_generic_check; + + pdata->edac_idx = edac_pci_idx++; + + if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { + debugf3("%s(): failed edac_pci_add_device()\n", __func__); + edac_pci_free_ctl_info(pci); + return NULL; + } + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); + +void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) +{ + edac_pci_del_device(pci->dev); + edac_pci_free_ctl_info(pci); +} + +EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c new file mode 100644 index 00000000000..fac94cae2c3 --- /dev/null +++ b/drivers/edac/edac_pci_sysfs.c @@ -0,0 +1,620 @@ +/* + * (C) 2005, 2006 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> + * + */ +#include <linux/module.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> + +#include "edac_core.h" +#include "edac_module.h" + +#ifdef CONFIG_PCI + +#define EDAC_PCI_SYMLINK "device" + +static int check_pci_errors; /* default YES check PCI parity */ +static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */ +static int edac_pci_log_pe = 1; /* log PCI parity errors */ +static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ +static atomic_t pci_parity_count = ATOMIC_INIT(0); +static atomic_t pci_nonparity_count = ATOMIC_INIT(0); +static int edac_pci_poll_msec = 1000; + +static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ +static struct completion edac_pci_kobj_complete; +static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); + +int edac_pci_get_check_errors(void) +{ + return check_pci_errors; +} + +int edac_pci_get_log_pe(void) +{ + return edac_pci_log_pe; +} + +int edac_pci_get_log_npe(void) +{ + return edac_pci_log_npe; +} + +int edac_pci_get_panic_on_pe(void) +{ + return edac_pci_panic_on_pe; +} + +int edac_pci_get_poll_msec(void) +{ + return edac_pci_poll_msec; +} + +/**************************** EDAC PCI sysfs instance *******************/ +static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count)); +} + +static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci, + char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count)); +} + +#define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_instance_attr(a) container_of(a, struct instance_attribute, attr) + +/* DEVICE instance kobject release() function */ +static void edac_pci_instance_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + debugf1("%s()\n", __func__); + + pci = to_instance(kobj); + complete(&pci->kobj_complete); +} + +/* instance specific attribute structure */ +struct instance_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_pci_ctl_info *, char *); + ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t); +}; + +/* Function to 'show' fields from the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->show) + return instance_attr->show(pci, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->store) + return instance_attr->store(pci, buffer, count); + return -EIO; +} + +static struct sysfs_ops pci_instance_ops = { + .show = edac_pci_instance_show, + .store = edac_pci_instance_store +}; + +#define INSTANCE_ATTR(_name, _mode, _show, _store) \ +static struct instance_attribute attr_instance_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); +INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); + +/* pci instance attributes */ +static struct instance_attribute *pci_instance_attr[] = { + &attr_instance_pe_count, + &attr_instance_npe_count, + NULL +}; + +/* the ktype for pci instance */ +static struct kobj_type ktype_pci_instance = { + .release = edac_pci_instance_release, + .sysfs_ops = &pci_instance_ops, + .default_attrs = (struct attribute **)pci_instance_attr, +}; + +static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + int err; + + pci->kobj.parent = &edac_pci_kobj; + pci->kobj.ktype = &ktype_pci_instance; + + err = kobject_set_name(&pci->kobj, "pci%d", idx); + if (err) + return err; + + err = kobject_register(&pci->kobj); + if (err != 0) { + debugf2("%s() failed to register instance pci%d\n", + __func__, idx); + return err; + } + + debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); + + return 0; +} + +static void +edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + init_completion(&pci->kobj_complete); + kobject_unregister(&pci->kobj); + wait_for_completion(&pci->kobj_complete); +} + +/***************************** EDAC PCI sysfs root **********************/ +#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr) + +static ssize_t edac_pci_int_show(void *ptr, char *buffer) +{ + int *value = ptr; + return sprintf(buffer, "%d\n", *value); +} + +static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) +{ + int *value = ptr; + + if (isdigit(*buffer)) + *value = simple_strtoul(buffer, NULL, 0); + + return count; +} + +struct edac_pci_dev_attribute { + struct attribute attr; + void *value; + ssize_t(*show) (void *, char *); + ssize_t(*store) (void *, const char *, size_t); +}; + +/* Set of show/store abstract level functions for PCI Parity object */ +static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->show(edac_pci_dev->value, buffer); + return -EIO; +} + +static ssize_t edac_pci_dev_store(struct kobject *kobj, + struct attribute *attr, const char *buffer, + size_t count) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->store(edac_pci_dev->value, buffer, count); + return -EIO; +} + +static struct sysfs_ops edac_pci_sysfs_ops = { + .show = edac_pci_dev_show, + .store = edac_pci_dev_store +}; + +#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = &_name, \ + .show = _show, \ + .store = _store, \ +}; + +#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = _data, \ + .show = _show, \ + .store = _store, \ +}; + +/* PCI Parity control files */ +EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); +EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); + +/* Base Attributes of the memory ECC object */ +static struct edac_pci_dev_attribute *edac_pci_attr[] = { + &edac_pci_attr_check_pci_errors, + &edac_pci_attr_edac_pci_log_pe, + &edac_pci_attr_edac_pci_log_npe, + &edac_pci_attr_edac_pci_panic_on_pe, + &edac_pci_attr_pci_parity_count, + &edac_pci_attr_pci_nonparity_count, + NULL, +}; + +/* No memory to release */ +static void edac_pci_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + pci = to_edacpci(kobj); + + debugf1("%s()\n", __func__); + complete(&pci->kobj_complete); +} + +static struct kobj_type ktype_edac_pci = { + .release = edac_pci_release, + .sysfs_ops = &edac_pci_sysfs_ops, + .default_attrs = (struct attribute **)edac_pci_attr, +}; + +/** + * edac_sysfs_pci_setup() + * + * setup the sysfs for EDAC PCI attributes + * assumes edac_class has already been initialized + */ +int edac_pci_register_main_kobj(void) +{ + int err; + struct sysdev_class *edac_class; + + debugf1("%s()\n", __func__); + + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class\n", __func__); + return -ENODEV; + } + + edac_pci_kobj.ktype = &ktype_edac_pci; + + edac_pci_kobj.parent = &edac_class->kset.kobj; + + err = kobject_set_name(&edac_pci_kobj, "pci"); + if (err) + return err; + + /* Instanstiate the pci object */ + /* FIXME: maybe new sysdev_create_subdir() */ + err = kobject_register(&edac_pci_kobj); + + if (err) { + debugf1("Failed to register '.../edac/pci'\n"); + return err; + } + + debugf1("Registered '.../edac/pci' kobject\n"); + + return 0; +} + +/* + * edac_pci_unregister_main_kobj() + * + * perform the sysfs teardown for the PCI attributes + */ +void edac_pci_unregister_main_kobj(void) +{ + debugf0("%s()\n", __func__); + init_completion(&edac_pci_kobj_complete); + kobject_unregister(&edac_pci_kobj); + wait_for_completion(&edac_pci_kobj_complete); +} + +int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) +{ + int err; + struct kobject *edac_kobj = &pci->kobj; + + if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) { + err = edac_pci_register_main_kobj(); + if (err) { + atomic_dec(&edac_pci_sysfs_refcount); + return err; + } + } + + err = edac_pci_create_instance_kobj(pci, pci->pci_idx); + if (err) { + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); + } + + debugf0("%s() idx=%d\n", __func__, pci->pci_idx); + + err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); + if (err) { + debugf0("%s() sysfs_create_link() returned err= %d\n", + __func__, err); + return err; + } + + return 0; +} + +void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) +{ + debugf0("%s()\n", __func__); + + edac_pci_delete_instance_kobj(pci, pci->pci_idx); + + sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); + + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); +} + +/************************ PCI error handling *************************/ +static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) +{ + int where; + u16 status; + + where = secondary ? PCI_SEC_STATUS : PCI_STATUS; + pci_read_config_word(dev, where, &status); + + /* If we get back 0xFFFF then we must suspect that the card has been + * pulled but the Linux PCI layer has not yet finished cleaning up. + * We don't want to report on such devices + */ + + if (status == 0xFFFF) { + u32 sanity; + + pci_read_config_dword(dev, 0, &sanity); + + if (sanity == 0xFFFFFFFF) + return 0; + } + + status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | + PCI_STATUS_PARITY; + + if (status) + /* reset only the bits we are interested in */ + pci_write_config_word(dev, where, status); + + return status; +} + +typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev); + +/* Clear any PCI parity errors logged by this device. */ +static void edac_pci_dev_parity_clear(struct pci_dev *dev) +{ + u8 header_type; + + get_pci_parity_status(dev, 0); + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) + get_pci_parity_status(dev, 1); +} + +/* + * PCI Parity polling + * + */ +static void edac_pci_dev_parity_test(struct pci_dev *dev) +{ + u16 status; + u8 header_type; + + /* read the STATUS register on this device + */ + status = get_pci_parity_status(dev, 0); + + debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Master Data Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* On bridges, need to examine secondary status register */ + status = get_pci_parity_status(dev, 1); + + debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the secondary status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Master Data Parity Error on " + "%s\n", pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + } +} + +/* + * pci_dev parity list iterator + * Scan the PCI device list for one iteration, looking for SERRORs + * Master Parity ERRORS or Parity ERRORs on primary or secondary devices + */ +static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) +{ + struct pci_dev *dev = NULL; + + /* request for kernel access to the next PCI device, if any, + * and while we are looking at it have its reference count + * bumped until we are done with it + */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + fn(dev); + } +} + +/* + * edac_pci_do_parity_check + * + * performs the actual PCI parity check operation + */ +void edac_pci_do_parity_check(void) +{ + unsigned long flags; + int before_count; + + debugf3("%s()\n", __func__); + + if (!check_pci_errors) + return; + + before_count = atomic_read(&pci_parity_count); + + /* scan all PCI devices looking for a Parity Error on devices and + * bridges + */ + local_irq_save(flags); + edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); + local_irq_restore(flags); + + /* Only if operator has selected panic on PCI Error */ + if (edac_pci_get_panic_on_pe()) { + /* If the count is different 'after' from 'before' */ + if (before_count != atomic_read(&pci_parity_count)) + panic("EDAC: PCI Parity Error"); + } +} + +void edac_pci_clear_parity_errors(void) +{ + /* Clear any PCI bus parity errors that devices initially have logged + * in their registers. + */ + edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); +} +void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global PE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.pe_count); + + if (edac_pci_get_log_pe()) + edac_pci_printk(pci, KERN_WARNING, + "Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_pe); + +void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global NPE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.npe_count); + + if (edac_pci_get_log_npe()) + edac_pci_printk(pci, KERN_WARNING, + "Non-Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_npe); + +/* + * Define the PCI parameter to the module + */ +module_param(check_pci_errors, int, 0644); +MODULE_PARM_DESC(check_pci_errors, + "Check for PCI bus parity errors: 0=off 1=on"); +module_param(edac_pci_panic_on_pe, int, 0644); +MODULE_PARM_DESC(edac_pci_panic_on_pe, + "Panic on PCI Bus Parity error: 0=off 1=on"); + +#endif /* CONFIG_PCI */ diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c new file mode 100644 index 00000000000..20b428aa155 --- /dev/null +++ b/drivers/edac/edac_stub.c @@ -0,0 +1,46 @@ +/* + * common EDAC components that must be in kernel + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#include <linux/module.h> +#include <linux/edac.h> +#include <asm/atomic.h> +#include <asm/edac.h> + +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + +atomic_t edac_handlers = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(edac_handlers); + +int edac_err_assert = 0; +EXPORT_SYMBOL_GPL(edac_err_assert); + +/* + * called to determine if there is an EDAC driver interested in + * knowing an event (such as NMI) occurred + */ +int edac_handler_set(void) +{ + if (edac_op_state == EDAC_OPSTATE_POLL) + return 0; + + return atomic_read(&edac_handlers); +} +EXPORT_SYMBOL_GPL(edac_handler_set); + +/* + * handler for NMI type of interrupts to assert error + */ +void edac_atomic_assert_error(void) +{ + edac_err_assert++; +} +EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c new file mode 100644 index 00000000000..0ecfdc432f8 --- /dev/null +++ b/drivers/edac/i3000_edac.c @@ -0,0 +1,506 @@ +/* + * Intel 3000/3010 Memory Controller kernel module + * Copyright (C) 2007 Akamai Technologies, Inc. + * Shamelessly copied from: + * Intel D82875P Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include "edac_core.h" + +#define I3000_REVISION "1.1" + +#define EDAC_MOD_STR "i3000_edac" + +#define I3000_RANKS 8 +#define I3000_RANKS_PER_CHANNEL 4 +#define I3000_CHANNELS 2 + +/* Intel 3000 register addresses - device 0 function 0 - DRAM Controller */ + +#define I3000_MCHBAR 0x44 /* MCH Memory Mapped Register BAR */ +#define I3000_MCHBAR_MASK 0xffffc000 +#define I3000_MMR_WINDOW_SIZE 16384 + +#define I3000_EDEAP 0x70 /* Extended DRAM Error Address Pointer (8b) + * + * 7:1 reserved + * 0 bit 32 of address + */ +#define I3000_DEAP 0x58 /* DRAM Error Address Pointer (32b) + * + * 31:7 address + * 6:1 reserved + * 0 Error channel 0/1 + */ +#define I3000_DEAP_GRAIN (1 << 7) +#define I3000_DEAP_PFN(edeap, deap) ((((edeap) & 1) << (32 - PAGE_SHIFT)) | \ + ((deap) >> PAGE_SHIFT)) +#define I3000_DEAP_OFFSET(deap) ((deap) & ~(I3000_DEAP_GRAIN-1) & ~PAGE_MASK) +#define I3000_DEAP_CHANNEL(deap) ((deap) & 1) + +#define I3000_DERRSYN 0x5c /* DRAM Error Syndrome (8b) + * + * 7:0 DRAM ECC Syndrome + */ + +#define I3000_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15:12 reserved + * 11 MCH Thermal Sensor Event for SMI/SCI/SERR + * 10 reserved + * 9 LOCK to non-DRAM Memory Flag (LCKF) + * 8 Received Refresh Timeout Flag (RRTOF) + * 7:2 reserved + * 1 Multiple-bit DRAM ECC Error Flag (DMERR) + * 0 Single-bit DRAM ECC Error Flag (DSERR) + */ +#define I3000_ERRSTS_BITS 0x0b03 /* bits which indicate errors */ +#define I3000_ERRSTS_UE 0x0002 +#define I3000_ERRSTS_CE 0x0001 + +#define I3000_ERRCMD 0xca /* Error Command (16b) + * + * 15:12 reserved + * 11 SERR on MCH Thermal Sensor Event (TSESERR) + * 10 reserved + * 9 SERR on LOCK to non-DRAM Memory (LCKERR) + * 8 SERR on DRAM Refresh Timeout (DRTOERR) + * 7:2 reserved + * 1 SERR Multiple-Bit DRAM ECC Error (DMERR) + * 0 SERR on Single-Bit ECC Error (DSERR) + */ + +/* Intel MMIO register space - device 0 function 0 - MMR space */ + +#define I3000_DRB_SHIFT 25 /* 32MiB grain */ + +#define I3000_C0DRB 0x100 /* Channel 0 DRAM Rank Boundary (8b x 4) + * + * 7:0 Channel 0 DRAM Rank Boundary Address + */ +#define I3000_C1DRB 0x180 /* Channel 1 DRAM Rank Boundary (8b x 4) + * + * 7:0 Channel 1 DRAM Rank Boundary Address + */ + +#define I3000_C0DRA 0x108 /* Channel 0 DRAM Rank Attribute (8b x 2) + * + * 7 reserved + * 6:4 DRAM odd Rank Attribute + * 3 reserved + * 2:0 DRAM even Rank Attribute + * + * Each attribute defines the page + * size of the corresponding rank: + * 000: unpopulated + * 001: reserved + * 010: 4 KB + * 011: 8 KB + * 100: 16 KB + * Others: reserved + */ +#define I3000_C1DRA 0x188 /* Channel 1 DRAM Rank Attribute (8b x 2) */ +#define ODD_RANK_ATTRIB(dra) (((dra) & 0x70) >> 4) +#define EVEN_RANK_ATTRIB(dra) ((dra) & 0x07) + +#define I3000_C0DRC0 0x120 /* DRAM Controller Mode 0 (32b) + * + * 31:30 reserved + * 29 Initialization Complete (IC) + * 28:11 reserved + * 10:8 Refresh Mode Select (RMS) + * 7 reserved + * 6:4 Mode Select (SMS) + * 3:2 reserved + * 1:0 DRAM Type (DT) + */ + +#define I3000_C0DRC1 0x124 /* DRAM Controller Mode 1 (32b) + * + * 31 Enhanced Addressing Enable (ENHADE) + * 30:0 reserved + */ + +enum i3000p_chips { + I3000 = 0, +}; + +struct i3000_dev_info { + const char *ctl_name; +}; + +struct i3000_error_info { + u16 errsts; + u8 derrsyn; + u8 edeap; + u32 deap; + u16 errsts2; +}; + +static const struct i3000_dev_info i3000_devs[] = { + [I3000] = { + .ctl_name = "i3000"}, +}; + +static struct pci_dev *mci_pdev; +static int i3000_registered = 1; +static struct edac_pci_ctl_info *i3000_pci; + +static void i3000_get_error_info(struct mem_ctl_info *mci, + struct i3000_error_info *info) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts); + if (!(info->errsts & I3000_ERRSTS_BITS)) + return; + pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap); + pci_read_config_dword(pdev, I3000_DEAP, &info->deap); + pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn); + pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts2); + + /* + * If the error is the same for both reads then the first set + * of reads is valid. If there is a change then there is a CE + * with no info and the second set of reads is valid and + * should be UE info. + */ + if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) { + pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap); + pci_read_config_dword(pdev, I3000_DEAP, &info->deap); + pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn); + } + + /* Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS, + I3000_ERRSTS_BITS); +} + +static int i3000_process_error_info(struct mem_ctl_info *mci, + struct i3000_error_info *info, + int handle_errors) +{ + int row, multi_chan; + int pfn, offset, channel; + + multi_chan = mci->csrows[0].nr_channels - 1; + + if (!(info->errsts & I3000_ERRSTS_BITS)) + return 0; + + if (!handle_errors) + return 1; + + if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + pfn = I3000_DEAP_PFN(info->edeap, info->deap); + offset = I3000_DEAP_OFFSET(info->deap); + channel = I3000_DEAP_CHANNEL(info->deap); + + row = edac_mc_find_csrow_by_page(mci, pfn); + + if (info->errsts & I3000_ERRSTS_UE) + edac_mc_handle_ue(mci, pfn, offset, row, "i3000 UE"); + else + edac_mc_handle_ce(mci, pfn, offset, info->derrsyn, row, + multi_chan ? channel : 0, "i3000 CE"); + + return 1; +} + +static void i3000_check(struct mem_ctl_info *mci) +{ + struct i3000_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i3000_get_error_info(mci, &info); + i3000_process_error_info(mci, &info, 1); +} + +static int i3000_is_interleaved(const unsigned char *c0dra, + const unsigned char *c1dra, + const unsigned char *c0drb, + const unsigned char *c1drb) +{ + int i; + + /* If the channels aren't populated identically then + * we're not interleaved. + */ + for (i = 0; i < I3000_RANKS_PER_CHANNEL / 2; i++) + if (ODD_RANK_ATTRIB(c0dra[i]) != ODD_RANK_ATTRIB(c1dra[i]) || + EVEN_RANK_ATTRIB(c0dra[i]) != + EVEN_RANK_ATTRIB(c1dra[i])) + return 0; + + /* If the rank boundaries for the two channels are different + * then we're not interleaved. + */ + for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++) + if (c0drb[i] != c1drb[i]) + return 0; + + return 1; +} + +static int i3000_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc; + int i; + struct mem_ctl_info *mci = NULL; + unsigned long last_cumul_size; + int interleaved, nr_channels; + unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS]; + unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2]; + unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL]; + unsigned long mchbar; + void *window; + + debugf0("MC: %s()\n", __func__); + + pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar); + mchbar &= I3000_MCHBAR_MASK; + window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE); + if (!window) { + printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n", + mchbar); + return -ENODEV; + } + + c0dra[0] = readb(window + I3000_C0DRA + 0); /* ranks 0,1 */ + c0dra[1] = readb(window + I3000_C0DRA + 1); /* ranks 2,3 */ + c1dra[0] = readb(window + I3000_C1DRA + 0); /* ranks 0,1 */ + c1dra[1] = readb(window + I3000_C1DRA + 1); /* ranks 2,3 */ + + for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++) { + c0drb[i] = readb(window + I3000_C0DRB + i); + c1drb[i] = readb(window + I3000_C1DRB + i); + } + + iounmap(window); + + /* Figure out how many channels we have. + * + * If we have what the datasheet calls "asymmetric channels" + * (essentially the same as what was called "virtual single + * channel mode" in the i82875) then it's a single channel as + * far as EDAC is concerned. + */ + interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb); + nr_channels = interleaved ? 2 : 1; + mci = edac_mc_alloc(0, I3000_RANKS / nr_channels, nr_channels, 0); + if (!mci) + return -ENOMEM; + + debugf3("MC: %s(): init mci\n", __func__); + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR2; + + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I3000_REVISION; + mci->ctl_name = i3000_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->edac_check = i3000_check; + mci->ctl_page_to_phys = NULL; + + /* + * The dram rank boundary (DRB) reg values are boundary addresses + * for each DRAM rank with a granularity of 32MB. DRB regs are + * cumulative; the last one will contain the total memory + * contained in all ranks. + * + * If we're in interleaved mode then we're only walking through + * the ranks of controller 0, so we double all the values we see. + */ + for (last_cumul_size = i = 0; i < mci->nr_csrows; i++) { + u8 value; + u32 cumul_size; + struct csrow_info *csrow = &mci->csrows[i]; + + value = drb[i]; + cumul_size = value << (I3000_DRB_SHIFT - PAGE_SHIFT); + if (interleaved) + cumul_size <<= 1; + debugf3("MC: %s(): (%d) cumul_size 0x%x\n", + __func__, i, cumul_size); + if (cumul_size == last_cumul_size) { + csrow->mtype = MEM_EMPTY; + continue; + } + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = I3000_DEAP_GRAIN; + csrow->mtype = MEM_DDR2; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = EDAC_UNKNOWN; + } + + /* Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS, + I3000_ERRSTS_BITS); + + rc = -ENODEV; + if (edac_mc_add_mc(mci)) { + debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* allocating generic PCI control info */ + i3000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i3000_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + /* get this far and it's successful */ + debugf3("MC: %s(): success\n", __func__); + return 0; + + fail: + if (mci) + edac_mc_free(mci); + + return rc; +} + +/* returns count (>= 0), or negative on error */ +static int __devinit i3000_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("MC: %s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i3000_probe1(pdev, ent->driver_data); + if (mci_pdev == NULL) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i3000_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0("%s()\n", __func__); + + if (i3000_pci) + edac_pci_release_generic_ctl(i3000_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + edac_mc_free(mci); +} + +static const struct pci_device_id i3000_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I3000}, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i3000_pci_tbl); + +static struct pci_driver i3000_driver = { + .name = EDAC_MOD_STR, + .probe = i3000_init_one, + .remove = __devexit_p(i3000_remove_one), + .id_table = i3000_pci_tbl, +}; + +static int __init i3000_init(void) +{ + int pci_rc; + + debugf3("MC: %s()\n", __func__); + pci_rc = pci_register_driver(&i3000_driver); + if (pci_rc < 0) + goto fail0; + + if (mci_pdev == NULL) { + i3000_registered = 0; + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_3000_HB, NULL); + if (!mci_pdev) { + debugf0("i3000 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i3000_init_one(mci_pdev, i3000_pci_tbl); + if (pci_rc < 0) { + debugf0("i3000 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i3000_driver); + +fail0: + if (mci_pdev) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i3000_exit(void) +{ + debugf3("MC: %s()\n", __func__); + + pci_unregister_driver(&i3000_driver); + if (!i3000_registered) { + i3000_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i3000_init); +module_exit(i3000_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akamai Technologies Arthur Ulfeldt/Jason Uhlenkott"); +MODULE_DESCRIPTION("MC support for Intel 3000 memory hub controllers"); diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c new file mode 100644 index 00000000000..96f7e63e399 --- /dev/null +++ b/drivers/edac/i5000_edac.c @@ -0,0 +1,1505 @@ +/* + * Intel 5000(P/V/X) class Memory Controllers kernel module + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Douglas Thompson Linux Networx (http://lnxi.com) + * norsk5@xmission.com + * + * This module is based on the following document: + * + * Intel 5000X Chipset Memory Controller Hub (MCH) - Datasheet + * http://developer.intel.com/design/chipsets/datashts/313070.htm + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <asm/mmzone.h> + +#include "edac_core.h" + +/* + * Alter this version for the I5000 module when modifications are made + */ +#define I5000_REVISION " Ver: 2.0.12 " __DATE__ +#define EDAC_MOD_STR "i5000_edac" + +#define i5000_printk(level, fmt, arg...) \ + edac_printk(level, "i5000", fmt, ##arg) + +#define i5000_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i5000", fmt, ##arg) + +#ifndef PCI_DEVICE_ID_INTEL_FBD_0 +#define PCI_DEVICE_ID_INTEL_FBD_0 0x25F5 +#endif +#ifndef PCI_DEVICE_ID_INTEL_FBD_1 +#define PCI_DEVICE_ID_INTEL_FBD_1 0x25F6 +#endif + +/* Device 16, + * Function 0: System Address + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID + */ +#define PCI_DEVICE_ID_INTEL_I5000_DEV16 0x25F0 + +/* OFFSETS for Function 0 */ + +/* OFFSETS for Function 1 */ +#define AMBASE 0x48 +#define MAXCH 0x56 +#define MAXDIMMPERCH 0x57 +#define TOLM 0x6C +#define REDMEMB 0x7C +#define RED_ECC_LOCATOR(x) ((x) & 0x3FFFF) +#define REC_ECC_LOCATOR_EVEN(x) ((x) & 0x001FF) +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3FE00) +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 +#define AMIR0 0x8C +#define AMIR1 0x90 +#define AMIR2 0x94 + +#define FERR_FAT_FBD 0x98 +#define NERR_FAT_FBD 0x9C +#define EXTRACT_FBDCHAN_INDX(x) (((x)>>28) & 0x3) +#define FERR_FAT_FBDCHAN 0x30000000 +#define FERR_FAT_M3ERR 0x00000004 +#define FERR_FAT_M2ERR 0x00000002 +#define FERR_FAT_M1ERR 0x00000001 +#define FERR_FAT_MASK (FERR_FAT_M1ERR | \ + FERR_FAT_M2ERR | \ + FERR_FAT_M3ERR) + +#define FERR_NF_FBD 0xA0 + +/* Thermal and SPD or BFD errors */ +#define FERR_NF_M28ERR 0x01000000 +#define FERR_NF_M27ERR 0x00800000 +#define FERR_NF_M26ERR 0x00400000 +#define FERR_NF_M25ERR 0x00200000 +#define FERR_NF_M24ERR 0x00100000 +#define FERR_NF_M23ERR 0x00080000 +#define FERR_NF_M22ERR 0x00040000 +#define FERR_NF_M21ERR 0x00020000 + +/* Correctable errors */ +#define FERR_NF_M20ERR 0x00010000 +#define FERR_NF_M19ERR 0x00008000 +#define FERR_NF_M18ERR 0x00004000 +#define FERR_NF_M17ERR 0x00002000 + +/* Non-Retry or redundant Retry errors */ +#define FERR_NF_M16ERR 0x00001000 +#define FERR_NF_M15ERR 0x00000800 +#define FERR_NF_M14ERR 0x00000400 +#define FERR_NF_M13ERR 0x00000200 + +/* Uncorrectable errors */ +#define FERR_NF_M12ERR 0x00000100 +#define FERR_NF_M11ERR 0x00000080 +#define FERR_NF_M10ERR 0x00000040 +#define FERR_NF_M9ERR 0x00000020 +#define FERR_NF_M8ERR 0x00000010 +#define FERR_NF_M7ERR 0x00000008 +#define FERR_NF_M6ERR 0x00000004 +#define FERR_NF_M5ERR 0x00000002 +#define FERR_NF_M4ERR 0x00000001 + +#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ + FERR_NF_M11ERR | \ + FERR_NF_M10ERR | \ + FERR_NF_M8ERR | \ + FERR_NF_M7ERR | \ + FERR_NF_M6ERR | \ + FERR_NF_M5ERR | \ + FERR_NF_M4ERR) +#define FERR_NF_CORRECTABLE (FERR_NF_M20ERR | \ + FERR_NF_M19ERR | \ + FERR_NF_M18ERR | \ + FERR_NF_M17ERR) +#define FERR_NF_DIMM_SPARE (FERR_NF_M27ERR | \ + FERR_NF_M28ERR) +#define FERR_NF_THERMAL (FERR_NF_M26ERR | \ + FERR_NF_M25ERR | \ + FERR_NF_M24ERR | \ + FERR_NF_M23ERR) +#define FERR_NF_SPD_PROTOCOL (FERR_NF_M22ERR) +#define FERR_NF_NORTH_CRC (FERR_NF_M21ERR) +#define FERR_NF_NON_RETRY (FERR_NF_M13ERR | \ + FERR_NF_M14ERR | \ + FERR_NF_M15ERR) + +#define NERR_NF_FBD 0xA4 +#define FERR_NF_MASK (FERR_NF_UNCORRECTABLE | \ + FERR_NF_CORRECTABLE | \ + FERR_NF_DIMM_SPARE | \ + FERR_NF_THERMAL | \ + FERR_NF_SPD_PROTOCOL | \ + FERR_NF_NORTH_CRC | \ + FERR_NF_NON_RETRY) + +#define EMASK_FBD 0xA8 +#define EMASK_FBD_M28ERR 0x08000000 +#define EMASK_FBD_M27ERR 0x04000000 +#define EMASK_FBD_M26ERR 0x02000000 +#define EMASK_FBD_M25ERR 0x01000000 +#define EMASK_FBD_M24ERR 0x00800000 +#define EMASK_FBD_M23ERR 0x00400000 +#define EMASK_FBD_M22ERR 0x00200000 +#define EMASK_FBD_M21ERR 0x00100000 +#define EMASK_FBD_M20ERR 0x00080000 +#define EMASK_FBD_M19ERR 0x00040000 +#define EMASK_FBD_M18ERR 0x00020000 +#define EMASK_FBD_M17ERR 0x00010000 + +#define EMASK_FBD_M15ERR 0x00004000 +#define EMASK_FBD_M14ERR 0x00002000 +#define EMASK_FBD_M13ERR 0x00001000 +#define EMASK_FBD_M12ERR 0x00000800 +#define EMASK_FBD_M11ERR 0x00000400 +#define EMASK_FBD_M10ERR 0x00000200 +#define EMASK_FBD_M9ERR 0x00000100 +#define EMASK_FBD_M8ERR 0x00000080 +#define EMASK_FBD_M7ERR 0x00000040 +#define EMASK_FBD_M6ERR 0x00000020 +#define EMASK_FBD_M5ERR 0x00000010 +#define EMASK_FBD_M4ERR 0x00000008 +#define EMASK_FBD_M3ERR 0x00000004 +#define EMASK_FBD_M2ERR 0x00000002 +#define EMASK_FBD_M1ERR 0x00000001 + +#define ENABLE_EMASK_FBD_FATAL_ERRORS (EMASK_FBD_M1ERR | \ + EMASK_FBD_M2ERR | \ + EMASK_FBD_M3ERR) + +#define ENABLE_EMASK_FBD_UNCORRECTABLE (EMASK_FBD_M4ERR | \ + EMASK_FBD_M5ERR | \ + EMASK_FBD_M6ERR | \ + EMASK_FBD_M7ERR | \ + EMASK_FBD_M8ERR | \ + EMASK_FBD_M9ERR | \ + EMASK_FBD_M10ERR | \ + EMASK_FBD_M11ERR | \ + EMASK_FBD_M12ERR) +#define ENABLE_EMASK_FBD_CORRECTABLE (EMASK_FBD_M17ERR | \ + EMASK_FBD_M18ERR | \ + EMASK_FBD_M19ERR | \ + EMASK_FBD_M20ERR) +#define ENABLE_EMASK_FBD_DIMM_SPARE (EMASK_FBD_M27ERR | \ + EMASK_FBD_M28ERR) +#define ENABLE_EMASK_FBD_THERMALS (EMASK_FBD_M26ERR | \ + EMASK_FBD_M25ERR | \ + EMASK_FBD_M24ERR | \ + EMASK_FBD_M23ERR) +#define ENABLE_EMASK_FBD_SPD_PROTOCOL (EMASK_FBD_M22ERR) +#define ENABLE_EMASK_FBD_NORTH_CRC (EMASK_FBD_M21ERR) +#define ENABLE_EMASK_FBD_NON_RETRY (EMASK_FBD_M15ERR | \ + EMASK_FBD_M14ERR | \ + EMASK_FBD_M13ERR) + +#define ENABLE_EMASK_ALL (ENABLE_EMASK_FBD_NON_RETRY | \ + ENABLE_EMASK_FBD_NORTH_CRC | \ + ENABLE_EMASK_FBD_SPD_PROTOCOL | \ + ENABLE_EMASK_FBD_THERMALS | \ + ENABLE_EMASK_FBD_DIMM_SPARE | \ + ENABLE_EMASK_FBD_FATAL_ERRORS | \ + ENABLE_EMASK_FBD_CORRECTABLE | \ + ENABLE_EMASK_FBD_UNCORRECTABLE) + +#define ERR0_FBD 0xAC +#define ERR1_FBD 0xB0 +#define ERR2_FBD 0xB4 +#define MCERR_FBD 0xB8 +#define NRECMEMA 0xBE +#define NREC_BANK(x) (((x)>>12) & 0x7) +#define NREC_RDWR(x) (((x)>>11) & 1) +#define NREC_RANK(x) (((x)>>8) & 0x7) +#define NRECMEMB 0xC0 +#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define NREC_RAS(x) ((x) & 0x7FFF) +#define NRECFGLOG 0xC4 +#define NREEECFBDA 0xC8 +#define NREEECFBDB 0xCC +#define NREEECFBDC 0xD0 +#define NREEECFBDD 0xD4 +#define NREEECFBDE 0xD8 +#define REDMEMA 0xDC +#define RECMEMA 0xE2 +#define REC_BANK(x) (((x)>>12) & 0x7) +#define REC_RDWR(x) (((x)>>11) & 1) +#define REC_RANK(x) (((x)>>8) & 0x7) +#define RECMEMB 0xE4 +#define REC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define REC_RAS(x) ((x) & 0x7FFF) +#define RECFGLOG 0xE8 +#define RECFBDA 0xEC +#define RECFBDB 0xF0 +#define RECFBDC 0xF4 +#define RECFBDD 0xF8 +#define RECFBDE 0xFC + +/* OFFSETS for Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ +#define PCI_DEVICE_ID_I5000_BRANCH_0 0x25F5 +#define PCI_DEVICE_ID_I5000_BRANCH_1 0x25F6 + +#define AMB_PRESENT_0 0x64 +#define AMB_PRESENT_1 0x66 +#define MTR0 0x80 +#define MTR1 0x84 +#define MTR2 0x88 +#define MTR3 0x8C + +#define NUM_MTRS 4 +#define CHANNELS_PER_BRANCH (2) + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (0x1 << 8)) +#define MTR_DRAM_WIDTH(mtr) ((((mtr) >> 6) & 0x1) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) ((((mtr) >> 5) & 0x1) ? 8 : 4) +#define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2) +#define MTR_DIMM_RANK(mtr) (((mtr) >> 4) & 0x1) +#define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +#ifdef CONFIG_EDAC_DEBUG +static char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "reserved" +}; + +static char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +/* Enumeration of supported devices */ +enum i5000_chips { + I5000P = 0, + I5000V = 1, /* future */ + I5000X = 2 /* future */ +}; + +/* Device name and register DID (Device ID) */ +struct i5000_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i5000_dev_info i5000_devs[] = { + [I5000P] = { + .ctl_name = "I5000", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I5000_DEV16, + }, +}; + +struct i5000_dimm_info { + int megabytes; /* size, 0 means not present */ + int dual_rank; +}; + +#define MAX_CHANNELS 6 /* max possible channels */ +#define MAX_CSROWS (8*2) /* max possible csrows per channel */ + +/* driver private data structure */ +struct i5000_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_0; /* 21.0 */ + struct pci_dev *branch_1; /* 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir0, mir1, mir2; + + u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b0_ambpresent0; /* Branch 0, Channel 0 */ + u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ + + u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b1_ambpresent0; /* Branch 1, Channel 8 */ + u16 b1_ambpresent1; /* Branch 1, Channel 1 */ + + /* DIMM infomation matrix, allocating architecture maximums */ + struct i5000_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; + + /* Actual values for this controller */ + int maxch; /* Max channels */ + int maxdimmperch; /* Max DIMMs per channel */ +}; + +/* I5000 MCH error information retrieved from Hardware */ +struct i5000_error_info { + + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a + * Non-Recoverable Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; + +static struct edac_pci_ctl_info *i5000_pci; + +/* + * i5000_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i5000_get_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info) +{ + struct i5000_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the */ + /* NEXT FATAL error register and the Memory Error Log Register A */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i5000_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i5000_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL errors, if any + */ +static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90]; + u32 allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + + /* mask off the Error bits that are possible */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + i5000_mc_printk(mci, KERN_ERR, + "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n", + allErrors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = NREC_BANK(info->nrecmema); + rank = NREC_RANK(info->nrecmema); + rdwr = NREC_RDWR(info->nrecmema); + ras = NREC_RAS(info->nrecmemb); + cas = NREC_CAS(info->nrecmemb); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Only 1 bit will be on */ + if (allErrors & FERR_FAT_M1ERR) { + i5000_mc_printk(mci, KERN_ERR, + "Alert on non-redundant retry or fast " + "reset timeout\n"); + + } else if (allErrors & FERR_FAT_M2ERR) { + i5000_mc_printk(mci, KERN_ERR, + "Northbound CRC error on non-redundant " + "retry\n"); + + } else if (allErrors & FERR_FAT_M3ERR) { + i5000_mc_printk(mci, KERN_ERR, + ">Tmid Thermal event with intelligent " + "throttling disabled\n"); + } + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " + "FATAL Err=0x%x)", + branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, + allErrors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i5000_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i5000_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90]; + u32 allErrors; + u32 ue_errors; + u32 ce_errors; + u32 misc_errors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + + /* mask off the Error bits that are possible */ + allErrors = (info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + i5000_mc_printk(mci, KERN_WARNING, + "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err " + "Reg= 0x%x\n", allErrors); + + ue_errors = allErrors & FERR_NF_UNCORRECTABLE; + if (ue_errors) { + debugf0("\tUncorrected bits= 0x%x\n", ue_errors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); + channel = branch; + bank = NREC_BANK(info->nrecmema); + rank = NREC_RANK(info->nrecmema); + rdwr = NREC_RDWR(info->nrecmema); + ras = NREC_RAS(info->nrecmemb); + cas = NREC_CAS(info->nrecmemb); + + debugf0 + ("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " + "CAS=%d, UE Err=0x%x)", + branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, + ue_errors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); + } + + /* Check correctable errors */ + ce_errors = allErrors & FERR_NF_CORRECTABLE; + if (ce_errors) { + debugf0("\tCorrected bits= 0x%x\n", ce_errors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = REC_BANK(info->recmema); + rank = REC_RANK(info->recmema); + rdwr = REC_RDWR(info->recmema); + ras = REC_RAS(info->recmemb); + cas = REC_CAS(info->recmemb); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " + "CAS=%d, CE Err=0x%x)", branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas, ce_errors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_THERMAL; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_NON_RETRY; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tNON-Retry Errors, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_NORTH_CRC; + if (misc_errors) { + i5000_printk(KERN_WARNING, + "\tNORTHBOUND CRC Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_SPD_PROTOCOL; + if (misc_errors) { + i5000_printk(KERN_WARNING, + "\tSPD Protocol Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_DIMM_SPARE; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tDIMM-Spare Error, bits= 0x%x\n", + misc_errors); + } +} + +/* + * i5000_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i5000_process_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + /* First handle any fatal errors that occurred */ + i5000_process_fatal_error_info(mci, info, handle_errors); + + /* now handle any non-fatal errors that occurred */ + i5000_process_nonfatal_error_info(mci, info, handle_errors); +} + +/* + * i5000_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i5000_clear_error(struct mem_ctl_info *mci) +{ + struct i5000_error_info info; + + i5000_get_error_info(mci, &info); +} + +/* + * i5000_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i5000_check_error(struct mem_ctl_info *mci) +{ + struct i5000_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i5000_get_error_info(mci, &info); + i5000_process_error_info(mci, &info, 1); +} + +/* + * i5000_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + //const struct i5000_dev_info *i5000_dev = &i5000_devs[dev_idx]; + struct i5000_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev); + + /* End of list, leave */ + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x FUNC 1 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16); + + return 1; + } + + /* Scan for device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 1) + break; + } + + pvt->branchmap_werrors = pdev; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'branchmap,control,errors' " + "device not found:" + "vendor 0x%x device 0x%x Func 2 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16); + + pci_dev_put(pvt->branchmap_werrors); + return 1; + } + + /* Scan for device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 2) + break; + } + + pvt->fsb_error_regs = pdev; + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pdev = NULL; + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_0, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_I5000_BRANCH_0); + + pci_dev_put(pvt->branchmap_werrors); + pci_dev_put(pvt->fsb_error_regs); + return 1; + } + + pvt->branch_0 = pdev; + + /* If this device claims to have more than 2 channels then + * fetch Branch 1's information + */ + if (pvt->maxch >= CHANNELS_PER_BRANCH) { + pdev = NULL; + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_1, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_1); + + pci_dev_put(pvt->branchmap_werrors); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branch_0); + return 1; + } + + pvt->branch_1 = pdev; + } + + return 0; +} + +/* + * i5000_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i5000_put_devices(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + + pvt = mci->pvt_info; + + pci_dev_put(pvt->branchmap_werrors); /* FUNC 1 */ + pci_dev_put(pvt->fsb_error_regs); /* FUNC 2 */ + pci_dev_put(pvt->branch_0); /* DEV 21 */ + + /* Only if more than 2 channels do we release the second branch */ + if (pvt->maxch >= CHANNELS_PER_BRANCH) + pci_dev_put(pvt->branch_1); /* DEV 22 */ +} + +/* + * determine_amb_resent + * + * the information is contained in NUM_MTRS different registers + * determineing which of the NUM_MTRS requires knowing + * which channel is in question + * + * 2 branches, each with 2 channels + * b0_ambpresent0 for channel '0' + * b0_ambpresent1 for channel '1' + * b1_ambpresent0 for channel '2' + * b1_ambpresent1 for channel '3' + */ +static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel) +{ + int amb_present; + + if (channel < CHANNELS_PER_BRANCH) { + if (channel & 0x1) + amb_present = pvt->b0_ambpresent1; + else + amb_present = pvt->b0_ambpresent0; + } else { + if (channel & 0x1) + amb_present = pvt->b1_ambpresent1; + else + amb_present = pvt->b1_ambpresent0; + } + + return amb_present; +} + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and channel desired + */ +static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel) +{ + int mtr; + + if (channel < CHANNELS_PER_BRANCH) + mtr = pvt->b0_mtr[csrow >> 1]; + else + mtr = pvt->b1_mtr[csrow >> 1]; + + return mtr; +} + +/* + */ +static void decode_mtr(int slot_row, u16 mtr) +{ + int ans; + + ans = MTR_DIMMS_PRESENT(mtr); + + debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, + ans ? "Present" : "NOT Present"); + if (!ans) + return; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); +} + +static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel, + struct i5000_dimm_info *dinfo) +{ + int mtr; + int amb_present_reg; + int addrBits; + + mtr = determine_mtr(pvt, csrow, channel); + if (MTR_DIMMS_PRESENT(mtr)) { + amb_present_reg = determine_amb_present_reg(pvt, channel); + + /* Determine if there is a DIMM present in this DIMM slot */ + if (amb_present_reg & (1 << (csrow >> 1))) { + dinfo->dual_rank = MTR_DIMM_RANK(mtr); + + if (!((dinfo->dual_rank == 0) && + ((csrow & 0x1) == 0x1))) { + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + } + } + } +} + +/* + * calculate_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void calculate_dimm_size(struct i5000_pvt *pvt) +{ + struct i5000_dimm_info *dinfo; + int csrow, max_csrows; + char *p, *mem_buffer; + int space, n; + int channel; + + /* ================= Generate some debug output ================= */ + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i5000_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + n = snprintf(p, space, "\n"); + p += n; + space -= n; + + /* Scan all the actual CSROWS (which is # of DIMMS * 2) + * and calculate the information for each DIMM + * Start with the highest csrow first, to display it first + * and work toward the 0th csrow + */ + max_csrows = pvt->maxdimmperch * 2; + for (csrow = max_csrows - 1; csrow >= 0; csrow--) { + + /* on an odd csrow, first output a 'boundary' marker, + * then reset the message buffer */ + if (csrow & 0x1) { + n = snprintf(p, space, "---------------------------" + "--------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + n = snprintf(p, space, "csrow %2d ", csrow); + p += n; + space -= n; + + for (channel = 0; channel < pvt->maxch; channel++) { + dinfo = &pvt->dimm_info[csrow][channel]; + handle_channel(pvt, csrow, channel, dinfo); + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + n = snprintf(p, space, "\n"); + p += n; + space -= n; + } + + /* Output the last bottom 'boundary' marker */ + n = snprintf(p, space, "---------------------------" + "--------------------------------\n"); + p += n; + space -= n; + + /* now output the 'channel' labels */ + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < pvt->maxch; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + n = snprintf(p, space, "\n"); + p += n; + space -= n; + + /* output the last message and free buffer */ + debugf2("%s\n", mem_buffer); + kfree(mem_buffer); +} + +/* + * i5000_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static void i5000_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + u32 actual_tolm; + u16 limit; + int slot_row; + int maxch; + int maxdimmperch; + int way0, way1; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) & pvt->ambase); + pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), + ((u32 *) & pvt->ambase) + sizeof(u32)); + + maxdimmperch = pvt->maxdimmperch; + maxch = pvt->maxch; + + debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", + (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = pvt->tolm << 28; + debugf2("Actual TOLM byte addr=%u (0x%x)\n", actual_tolm, actual_tolm); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); + pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir2); + + /* Get the MIR[0-2] regs */ + limit = (pvt->mir0 >> 4) & 0x0FFF; + way0 = pvt->mir0 & 0x1; + way1 = pvt->mir0 & 0x2; + debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + limit = (pvt->mir1 >> 4) & 0x0FFF; + way0 = pvt->mir1 & 0x1; + way1 = pvt->mir1 & 0x2; + debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + limit = (pvt->mir2 >> 4) & 0x0FFF; + way0 = pvt->mir2 & 0x1; + way1 = pvt->mir2 & 0x2; + debugf2("MIR2: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + + /* Get the MTR[0-3] regs */ + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + int where = MTR0 + (slot_row * sizeof(u32)); + + pci_read_config_word(pvt->branch_0, where, + &pvt->b0_mtr[slot_row]); + + debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, + pvt->b0_mtr[slot_row]); + + if (pvt->maxch >= CHANNELS_PER_BRANCH) { + pci_read_config_word(pvt->branch_1, where, + &pvt->b1_mtr[slot_row]); + debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, + where, pvt->b0_mtr[slot_row]); + } else { + pvt->b1_mtr[slot_row] = 0; + } + } + + /* Read and dump branch 0's MTRs */ + debugf2("\nMemory Technology Registers:\n"); + debugf2(" Branch 0:\n"); + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + decode_mtr(slot_row, pvt->b0_mtr[slot_row]); + } + pci_read_config_word(pvt->branch_0, AMB_PRESENT_0, + &pvt->b0_ambpresent0); + debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); + pci_read_config_word(pvt->branch_0, AMB_PRESENT_1, + &pvt->b0_ambpresent1); + debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); + + /* Only if we have 2 branchs (4 channels) */ + if (pvt->maxch < CHANNELS_PER_BRANCH) { + pvt->b1_ambpresent0 = 0; + pvt->b1_ambpresent1 = 0; + } else { + /* Read and dump branch 1's MTRs */ + debugf2(" Branch 1:\n"); + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + decode_mtr(slot_row, pvt->b1_mtr[slot_row]); + } + pci_read_config_word(pvt->branch_1, AMB_PRESENT_0, + &pvt->b1_ambpresent0); + debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", + pvt->b1_ambpresent0); + pci_read_config_word(pvt->branch_1, AMB_PRESENT_1, + &pvt->b1_ambpresent1); + debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", + pvt->b1_ambpresent1); + } + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + calculate_dimm_size(pvt); +} + +/* + * i5000_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i5000_init_csrows(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + struct csrow_info *p_csrow; + int empty, channel_count; + int max_csrows; + int mtr; + int csrow_megs; + int channel; + int csrow; + + pvt = mci->pvt_info; + + channel_count = pvt->maxch; + max_csrows = pvt->maxdimmperch * 2; + + empty = 1; /* Assume NO memory */ + + for (csrow = 0; csrow < max_csrows; csrow++) { + p_csrow = &mci->csrows[csrow]; + + p_csrow->csrow_idx = csrow; + + /* use branch 0 for the basis */ + mtr = pvt->b0_mtr[csrow >> 1]; + + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + csrow * 20; + p_csrow->last_page = 9 + csrow * 20; + p_csrow->page_mask = 0xFFF; + + p_csrow->grain = 8; + + csrow_megs = 0; + for (channel = 0; channel < pvt->maxch; channel++) { + csrow_megs += pvt->dimm_info[csrow][channel].megabytes; + } + + p_csrow->nr_pages = csrow_megs << 8; + + /* Assume DDR2 for now */ + p_csrow->mtype = MEM_FB_DDR2; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + p_csrow->edac_mode = EDAC_S8ECD8ED; + + empty = 0; + } + + return empty; +} + +/* + * i5000_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i5000_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} + +/* + * i5000_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels) + * + * ask the device how many channels are present and how many CSROWS + * as well + */ +static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev, + int *num_dimms_per_channel, + int *num_channels) +{ + u8 value; + + /* Need to retrieve just how many channels and dimms per channel are + * supported on this memory controller + */ + pci_read_config_byte(pdev, MAXDIMMPERCH, &value); + *num_dimms_per_channel = (int)value *2; + + pci_read_config_byte(pdev, MAXCH, &value); + *num_channels = (int)value; +} + +/* + * i5000_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i5000_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i5000_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + + /* Ask the devices for the number of CSROWS and CHANNELS so + * that we can calculate the memory resources, etc + * + * The Chipset will report what it can handle which will be greater + * or equal to what the motherboard manufacturer will implement. + * + * As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel, + &num_channels); + num_csrows = num_dimms_per_channel * 2; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + pvt->maxch = num_channels; + pvt->maxdimmperch = num_dimms_per_channel; + + /* 'get' the pci devices we want to reserve for our use */ + if (i5000_get_devices(mci, dev_idx)) + goto fail0; + + /* Time to get serious */ + i5000_get_mc_regs(mci); /* retrieve the hardware registers */ + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i5000_edac.c"; + mci->mod_ver = I5000_REVISION; + mci->ctl_name = i5000_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + + /* Set the function pointer to an actual operation function */ + mci->edac_check = i5000_check_error; + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i5000_init_csrows(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i5000_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { + debugf1("MC: Enable error reporting now\n"); + i5000_enable_error_reporting(mci); + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + + i5000_clear_error(mci); + + /* allocating generic PCI control info */ + i5000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i5000_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i5000_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i5000_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i5000_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i5000_probe1(pdev, id->driver_data); +} + +/* + * i5000_remove_one destructor for one instance of device + * + */ +static void __devexit i5000_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i5000_pci) + edac_pci_release_generic_ctl(i5000_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + /* retrieve references to resources, and free those resources */ + i5000_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i5000_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), + .driver_data = I5000P}, + + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i5000_pci_tbl); + +/* + * i5000_driver pci_driver structure for this module + * + */ +static struct pci_driver i5000_driver = { + .name = __stringify(KBUILD_BASENAME), + .probe = i5000_init_one, + .remove = __devexit_p(i5000_remove_one), + .id_table = i5000_pci_tbl, +}; + +/* + * i5000_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i5000_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + pci_rc = pci_register_driver(&i5000_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i5000_exit() Module exit function + * Unregister the driver + */ +static void __exit i5000_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i5000_driver); +} + +module_init(i5000_init); +module_exit(i5000_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR + ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); +MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " + I5000_REVISION); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c new file mode 100644 index 00000000000..83bfe37c4bb --- /dev/null +++ b/drivers/edac/i82443bxgx_edac.c @@ -0,0 +1,402 @@ +/* + * Intel 82443BX/GX (440BX/GX chipset) Memory Controller EDAC kernel + * module (C) 2006 Tim Small + * + * This file may be distributed under the terms of the GNU General + * Public License. + * + * Written by Tim Small <tim@buttersideup.com>, based on work by Linux + * Networx, Thayne Harbaugh, Dan Hollis <goemon at anime dot net> and + * others. + * + * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>. + * + * Written with reference to 82443BX Host Bridge Datasheet: + * http://www.intel.com/design/chipsets/440/documentation.htm + * references to this document given in []. + * + * This module doesn't support the 440LX, but it may be possible to + * make it do so (the 440LX's register definitions are different, but + * not completely so - I haven't studied them in enough detail to know + * how easy this would be). + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/pci.h> +#include <linux/pci_ids.h> + +#include <linux/slab.h> + +#include "edac_core.h" + +#define I82443_REVISION "0.1" + +#define EDAC_MOD_STR "i82443bxgx_edac" + +/* The 82443BX supports SDRAM, or EDO (EDO for mobile only), "Memory + * Size: 8 MB to 512 MB (1GB with Registered DIMMs) with eight memory + * rows" "The 82443BX supports multiple-bit error detection and + * single-bit error correction when ECC mode is enabled and + * single/multi-bit error detection when correction is disabled. + * During writes to the DRAM, the 82443BX generates ECC for the data + * on a QWord basis. Partial QWord writes require a read-modify-write + * cycle when ECC is enabled." +*/ + +/* "Additionally, the 82443BX ensures that the data is corrected in + * main memory so that accumulation of errors is prevented. Another + * error within the same QWord would result in a double-bit error + * which is unrecoverable. This is known as hardware scrubbing since + * it requires no software intervention to correct the data in memory." + */ + +/* [Also see page 100 (section 4.3), "DRAM Interface"] + * [Also see page 112 (section 4.6.1.4), ECC] + */ + +#define I82443BXGX_NR_CSROWS 8 +#define I82443BXGX_NR_CHANS 1 +#define I82443BXGX_NR_DIMMS 4 + +/* 82443 PCI Device 0 */ +#define I82443BXGX_NBXCFG 0x50 /* 32bit register starting at this PCI + * config space offset */ +#define I82443BXGX_NBXCFG_OFFSET_NON_ECCROW 24 /* Array of bits, zero if + * row is non-ECC */ +#define I82443BXGX_NBXCFG_OFFSET_DRAM_FREQ 12 /* 2 bits,00=100MHz,10=66 MHz */ + +#define I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY 7 /* 2 bits: */ +#define I82443BXGX_NBXCFG_INTEGRITY_NONE 0x0 /* 00 = Non-ECC */ +#define I82443BXGX_NBXCFG_INTEGRITY_EC 0x1 /* 01 = EC (only) */ +#define I82443BXGX_NBXCFG_INTEGRITY_ECC 0x2 /* 10 = ECC */ +#define I82443BXGX_NBXCFG_INTEGRITY_SCRUB 0x3 /* 11 = ECC + HW Scrub */ + +#define I82443BXGX_NBXCFG_OFFSET_ECC_DIAG_ENABLE 6 + +/* 82443 PCI Device 0 */ +#define I82443BXGX_EAP 0x80 /* 32bit register starting at this PCI + * config space offset, Error Address + * Pointer Register */ +#define I82443BXGX_EAP_OFFSET_EAP 12 /* High 20 bits of error address */ +#define I82443BXGX_EAP_OFFSET_MBE BIT(1) /* Err at EAP was multi-bit (W1TC) */ +#define I82443BXGX_EAP_OFFSET_SBE BIT(0) /* Err at EAP was single-bit (W1TC) */ + +#define I82443BXGX_ERRCMD 0x90 /* 8bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_MBE BIT(1) /* 1 = enable */ +#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_SBE BIT(0) /* 1 = enable */ + +#define I82443BXGX_ERRSTS 0x91 /* 16bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_ERRSTS_OFFSET_MBFRE 5 /* 3 bits - first err row multibit */ +#define I82443BXGX_ERRSTS_OFFSET_MEF BIT(4) /* 1 = MBE occurred */ +#define I82443BXGX_ERRSTS_OFFSET_SBFRE 1 /* 3 bits - first err row singlebit */ +#define I82443BXGX_ERRSTS_OFFSET_SEF BIT(0) /* 1 = SBE occurred */ + +#define I82443BXGX_DRAMC 0x57 /* 8bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_DRAMC_OFFSET_DT 3 /* 2 bits, DRAM Type */ +#define I82443BXGX_DRAMC_DRAM_IS_EDO 0 /* 00 = EDO */ +#define I82443BXGX_DRAMC_DRAM_IS_SDRAM 1 /* 01 = SDRAM */ +#define I82443BXGX_DRAMC_DRAM_IS_RSDRAM 2 /* 10 = Registered SDRAM */ + +#define I82443BXGX_DRB 0x60 /* 8x 8bit registers starting at this PCI + * config space offset. */ + +/* FIXME - don't poll when ECC disabled? */ + +struct i82443bxgx_edacmc_error_info { + u32 eap; +}; + +static struct edac_pci_ctl_info *i82443bxgx_pci; + +static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci, + struct i82443bxgx_edacmc_error_info + *info) +{ + struct pci_dev *pdev; + pdev = to_pci_dev(mci->dev); + pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap); + if (info->eap & I82443BXGX_EAP_OFFSET_SBE) + /* Clear error to allow next error to be reported [p.61] */ + pci_write_bits32(pdev, I82443BXGX_EAP, + I82443BXGX_EAP_OFFSET_SBE, + I82443BXGX_EAP_OFFSET_SBE); + + if (info->eap & I82443BXGX_EAP_OFFSET_MBE) + /* Clear error to allow next error to be reported [p.61] */ + pci_write_bits32(pdev, I82443BXGX_EAP, + I82443BXGX_EAP_OFFSET_MBE, + I82443BXGX_EAP_OFFSET_MBE); +} + +static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci, + struct + i82443bxgx_edacmc_error_info + *info, int handle_errors) +{ + int error_found = 0; + u32 eapaddr, page, pageoffset; + + /* bits 30:12 hold the 4kb block in which the error occurred + * [p.61] */ + eapaddr = (info->eap & 0xfffff000); + page = eapaddr >> PAGE_SHIFT; + pageoffset = eapaddr - (page << PAGE_SHIFT); + + if (info->eap & I82443BXGX_EAP_OFFSET_SBE) { + error_found = 1; + if (handle_errors) + edac_mc_handle_ce(mci, page, pageoffset, + /* 440BX/GX don't make syndrome information + * available */ + 0, edac_mc_find_csrow_by_page(mci, page), 0, + mci->ctl_name); + } + + if (info->eap & I82443BXGX_EAP_OFFSET_MBE) { + error_found = 1; + if (handle_errors) + edac_mc_handle_ue(mci, page, pageoffset, + edac_mc_find_csrow_by_page(mci, page), + mci->ctl_name); + } + + return error_found; +} + +static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci) +{ + struct i82443bxgx_edacmc_error_info info; + + debugf1("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i82443bxgx_edacmc_get_error_info(mci, &info); + i82443bxgx_edacmc_process_error_info(mci, &info, 1); +} + +static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, + enum edac_type edac_mode, + enum mem_type mtype) +{ + struct csrow_info *csrow; + int index; + u8 drbar, dramc; + u32 row_base, row_high_limit, row_high_limit_last; + + pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); + row_high_limit_last = 0; + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar); + debugf1("MC%d: " __FILE__ ": %s() Row=%d DRB = %#0x\n", + mci->mc_idx, __func__, index, drbar); + row_high_limit = ((u32) drbar << 23); + /* find the DRAM Chip Select Base address and mask */ + debugf1("MC%d: " __FILE__ ": %s() Row=%d, " + "Boundry Address=%#0x, Last = %#0x \n", + mci->mc_idx, __func__, index, row_high_limit, + row_high_limit_last); + + /* 440GX goes to 2GB, represented with a DRB of 0. */ + if (row_high_limit_last && !row_high_limit) + row_high_limit = 1UL << 31; + + /* This row is empty [p.49] */ + if (row_high_limit == row_high_limit_last) + continue; + row_base = row_high_limit_last; + csrow->first_page = row_base >> PAGE_SHIFT; + csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1; + csrow->nr_pages = csrow->last_page - csrow->first_page + 1; + /* EAP reports in 4kilobyte granularity [61] */ + csrow->grain = 1 << 12; + csrow->mtype = mtype; + /* I don't think 440BX can tell you device type? FIXME? */ + csrow->dtype = DEV_UNKNOWN; + /* Mode is global to all rows on 440BX */ + csrow->edac_mode = edac_mode; + row_high_limit_last = row_high_limit; + } +} + +static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + u8 dramc; + u32 nbxcfg, ecc_mode; + enum mem_type mtype; + enum edac_type edac_mode; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* Something is really hosed if PCI config space reads from + * the MC aren't working. + */ + if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg)) + return -EIO; + + mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); + switch ((dramc >> I82443BXGX_DRAMC_OFFSET_DT) & (BIT(0) | BIT(1))) { + case I82443BXGX_DRAMC_DRAM_IS_EDO: + mtype = MEM_EDO; + break; + case I82443BXGX_DRAMC_DRAM_IS_SDRAM: + mtype = MEM_SDR; + break; + case I82443BXGX_DRAMC_DRAM_IS_RSDRAM: + mtype = MEM_RDR; + break; + default: + debugf0("Unknown/reserved DRAM type value " + "in DRAMC register!\n"); + mtype = -MEM_UNKNOWN; + } + + if ((mtype == MEM_SDR) || (mtype == MEM_RDR)) + mci->edac_cap = mci->edac_ctl_cap; + else + mci->edac_cap = EDAC_FLAG_NONE; + + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg); + ecc_mode = ((nbxcfg >> I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY) & + (BIT(0) | BIT(1))); + + mci->scrub_mode = (ecc_mode == I82443BXGX_NBXCFG_INTEGRITY_SCRUB) + ? SCRUB_HW_SRC : SCRUB_NONE; + + switch (ecc_mode) { + case I82443BXGX_NBXCFG_INTEGRITY_NONE: + edac_mode = EDAC_NONE; + break; + case I82443BXGX_NBXCFG_INTEGRITY_EC: + edac_mode = EDAC_EC; + break; + case I82443BXGX_NBXCFG_INTEGRITY_ECC: + case I82443BXGX_NBXCFG_INTEGRITY_SCRUB: + edac_mode = EDAC_SECDED; + break; + default: + debugf0("%s(): Unknown/reserved ECC state " + "in NBXCFG register!\n", __func__); + edac_mode = EDAC_UNKNOWN; + break; + } + + i82443bxgx_init_csrows(mci, pdev, edac_mode, mtype); + + /* Many BIOSes don't clear error flags on boot, so do this + * here, or we get "phantom" errors occuring at module-load + * time. */ + pci_write_bits32(pdev, I82443BXGX_EAP, + (I82443BXGX_EAP_OFFSET_SBE | + I82443BXGX_EAP_OFFSET_MBE), + (I82443BXGX_EAP_OFFSET_SBE | + I82443BXGX_EAP_OFFSET_MBE)); + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I82443_REVISION; + mci->ctl_name = "I82443BXGX"; + mci->dev_name = pci_name(pdev); + mci->edac_check = i82443bxgx_edacmc_check; + mci->ctl_page_to_phys = NULL; + + if (edac_mc_add_mc(mci)) { + debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* allocating generic PCI control info */ + i82443bxgx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82443bxgx_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + debugf3("MC: " __FILE__ ": %s(): success\n", __func__); + return 0; + +fail: + edac_mc_free(mci); + return -ENODEV; +} + +EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1); + +/* returns count (>= 0), or negative on error */ +static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* don't need to call pci_device_enable() */ + return i82443bxgx_edacmc_probe1(pdev, ent->driver_data); +} + +static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i82443bxgx_pci) + edac_pci_release_generic_ctl(i82443bxgx_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + edac_mc_free(mci); +} + +EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); + +static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl); + +static struct pci_driver i82443bxgx_edacmc_driver = { + .name = EDAC_MOD_STR, + .probe = i82443bxgx_edacmc_init_one, + .remove = __devexit_p(i82443bxgx_edacmc_remove_one), + .id_table = i82443bxgx_pci_tbl, +}; + +static int __init i82443bxgx_edacmc_init(void) +{ + return pci_register_driver(&i82443bxgx_edacmc_driver); +} + +static void __exit i82443bxgx_edacmc_exit(void) +{ + pci_unregister_driver(&i82443bxgx_edacmc_driver); +} + +module_init(i82443bxgx_edacmc_init); +module_exit(i82443bxgx_edacmc_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD"); +MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers"); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index e4bb298e613..f5ecd2c4d81 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -14,9 +14,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define I82860_REVISION " Ver: 2.0.1 " __DATE__ +#define I82860_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "i82860_edac" #define i82860_printk(level, fmt, arg...) \ @@ -54,16 +54,16 @@ struct i82860_error_info { static const struct i82860_dev_info i82860_devs[] = { [I82860] = { - .ctl_name = "i82860" - }, + .ctl_name = "i82860"}, }; -static struct pci_dev *mci_pdev = NULL; /* init dev: in case that AGP code +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code * has already registered driver */ +static struct edac_pci_ctl_info *i82860_pci; static void i82860_get_error_info(struct mem_ctl_info *mci, - struct i82860_error_info *info) + struct i82860_error_info *info) { struct pci_dev *pdev; @@ -91,13 +91,13 @@ static void i82860_get_error_info(struct mem_ctl_info *mci, if ((info->errsts ^ info->errsts2) & 0x0003) { pci_read_config_dword(pdev, I82860_EAP, &info->eap); - pci_read_config_word(pdev, I82860_DERRCTL_STS, - &info->derrsyn); + pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn); } } static int i82860_process_error_info(struct mem_ctl_info *mci, - struct i82860_error_info *info, int handle_errors) + struct i82860_error_info *info, + int handle_errors) { int row; @@ -136,7 +136,7 @@ static void i82860_check(struct mem_ctl_info *mci) static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) { unsigned long last_cumul_size; - u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ + u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ u16 value; u32 cumul_size; struct csrow_info *csrow; @@ -155,7 +155,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) csrow = &mci->csrows[index]; pci_read_config_word(pdev, I82860_GBA + index * 2, &value); cumul_size = (value & I82860_GBA_MASK) << - (I82860_GBA_SHIFT - PAGE_SHIFT); + (I82860_GBA_SHIFT - PAGE_SHIFT); debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, cumul_size); @@ -186,7 +186,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) the channel and the GRA registers map to physical devices so we are going to make 1 channel for group. */ - mci = edac_mc_alloc(0, 16, 1); + mci = edac_mc_alloc(0, 16, 1, 0); if (!mci) return -ENOMEM; @@ -200,19 +200,31 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = I82860_REVISION; mci->ctl_name = i82860_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = i82860_check; mci->ctl_page_to_phys = NULL; i82860_init_csrows(mci, pdev); - i82860_get_error_info(mci, &discard); /* clear counters */ + i82860_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } + /* allocating generic PCI control info */ + i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82860_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); @@ -225,7 +237,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit i82860_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -249,6 +261,9 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (i82860_pci) + edac_pci_release_generic_ctl(i82860_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -257,12 +272,11 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) static const struct pci_device_id i82860_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - I82860 - }, + PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82860}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, i82860_pci_tbl); @@ -329,5 +343,5 @@ module_exit(i82860_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " - "Ben Woodard <woodard@redhat.com>"); + "Ben Woodard <woodard@redhat.com>"); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 2800b3e614a..031abadc439 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -18,9 +18,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define I82875P_REVISION " Ver: 2.0.1 " __DATE__ +#define I82875P_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "i82875p_edac" #define i82875p_printk(level, fmt, arg...) \ @@ -174,18 +174,19 @@ struct i82875p_error_info { static const struct i82875p_dev_info i82875p_devs[] = { [I82875P] = { - .ctl_name = "i82875p" - }, + .ctl_name = "i82875p"}, }; -static struct pci_dev *mci_pdev = NULL; /* init dev: in case that AGP code has +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has * already registered driver */ static int i82875p_registered = 1; +static struct edac_pci_ctl_info *i82875p_pci; + static void i82875p_get_error_info(struct mem_ctl_info *mci, - struct i82875p_error_info *info) + struct i82875p_error_info *info) { struct pci_dev *pdev; @@ -197,38 +198,39 @@ static void i82875p_get_error_info(struct mem_ctl_info *mci, * overwritten by UE. */ pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts); + + if (!(info->errsts & 0x0081)) + return; + pci_read_config_dword(pdev, I82875P_EAP, &info->eap); pci_read_config_byte(pdev, I82875P_DES, &info->des); pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn); pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts2); - pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081); - /* * If the error is the same then we can for both reads then * the first set of reads is valid. If there is a change then * there is a CE no info and the second set of reads is valid * and should be UE info. */ - if (!(info->errsts2 & 0x0081)) - return; - if ((info->errsts ^ info->errsts2) & 0x0081) { pci_read_config_dword(pdev, I82875P_EAP, &info->eap); pci_read_config_byte(pdev, I82875P_DES, &info->des); - pci_read_config_byte(pdev, I82875P_DERRSYN, - &info->derrsyn); + pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn); } + + pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081); } static int i82875p_process_error_info(struct mem_ctl_info *mci, - struct i82875p_error_info *info, int handle_errors) + struct i82875p_error_info *info, + int handle_errors) { int row, multi_chan; multi_chan = mci->csrows[0].nr_channels - 1; - if (!(info->errsts2 & 0x0081)) + if (!(info->errsts & 0x0081)) return 0; if (!handle_errors) @@ -263,10 +265,12 @@ static void i82875p_check(struct mem_ctl_info *mci) /* Return 0 on success or 1 on failure. */ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, - struct pci_dev **ovrfl_pdev, void __iomem **ovrfl_window) + struct pci_dev **ovrfl_pdev, + void __iomem **ovrfl_window) { struct pci_dev *dev; void __iomem *window; + int err; *ovrfl_pdev = NULL; *ovrfl_window = NULL; @@ -284,14 +288,19 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (dev == NULL) return 1; - pci_bus_add_device(dev); + err = pci_bus_add_device(dev); + if (err) { + i82875p_printk(KERN_ERR, + "%s(): pci_bus_add_device() Failed\n", + __func__); + } } *ovrfl_pdev = dev; if (pci_enable_device(dev)) { i82875p_printk(KERN_ERR, "%s(): Failed to enable overflow " - "device\n", __func__); + "device\n", __func__); return 1; } @@ -307,7 +316,7 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (window == NULL) { i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n", - __func__); + __func__); goto fail1; } @@ -325,21 +334,20 @@ fail0: return 1; } - /* Return 1 if dual channel mode is active. Else return 0. */ static inline int dual_channel_active(u32 drc) { return (drc >> 21) & 0x1; } - static void i82875p_init_csrows(struct mem_ctl_info *mci, - struct pci_dev *pdev, void __iomem *ovrfl_window, u32 drc) + struct pci_dev *pdev, + void __iomem * ovrfl_window, u32 drc) { struct csrow_info *csrow; unsigned long last_cumul_size; u8 value; - u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ + u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ u32 cumul_size; int index; @@ -392,7 +400,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) drc = readl(ovrfl_window + I82875P_DRC); nr_chans = dual_channel_active(drc) + 1; mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans), - nr_chans); + nr_chans, 0); if (!mci) { rc = -ENOMEM; @@ -407,23 +415,35 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = I82875P_REVISION; mci->ctl_name = i82875p_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = i82875p_check; mci->ctl_page_to_phys = NULL; debugf3("%s(): init pvt\n", __func__); - pvt = (struct i82875p_pvt *) mci->pvt_info; + pvt = (struct i82875p_pvt *)mci->pvt_info; pvt->ovrfl_pdev = ovrfl_pdev; pvt->ovrfl_window = ovrfl_window; i82875p_init_csrows(mci, pdev, ovrfl_window, drc); - i82875p_get_error_info(mci, &discard); /* clear counters */ + i82875p_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail1; } + /* allocating generic PCI control info */ + i82875p_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82875p_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -442,7 +462,7 @@ fail0: /* returns count (>= 0), or negative on error */ static int __devinit i82875p_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -467,10 +487,13 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (i82875p_pci) + edac_pci_release_generic_ctl(i82875p_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct i82875p_pvt *) mci->pvt_info; + pvt = (struct i82875p_pvt *)mci->pvt_info; if (pvt->ovrfl_window) iounmap(pvt->ovrfl_window); @@ -488,12 +511,11 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - I82875P - }, + PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82875P}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); @@ -517,7 +539,7 @@ static int __init i82875p_init(void) if (mci_pdev == NULL) { mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82875_0, NULL); + PCI_DEVICE_ID_INTEL_82875_0, NULL); if (!mci_pdev) { debugf0("875p pci_get_device fail\n"); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c new file mode 100644 index 00000000000..0ee88845693 --- /dev/null +++ b/drivers/edac/i82975x_edac.c @@ -0,0 +1,666 @@ +/* + * Intel 82975X Memory Controller kernel module + * (C) 2007 aCarLab (India) Pvt. Ltd. (http://acarlab.com) + * (C) 2007 jetzbroadband (http://jetzbroadband.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Arvind R. + * Copied from i82875p_edac.c source: + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> + +#include "edac_core.h" + +#define I82975X_REVISION " Ver: 1.0.0 " __DATE__ +#define EDAC_MOD_STR "i82975x_edac" + +#define i82975x_printk(level, fmt, arg...) \ + edac_printk(level, "i82975x", fmt, ##arg) + +#define i82975x_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i82975x", fmt, ##arg) + +#ifndef PCI_DEVICE_ID_INTEL_82975_0 +#define PCI_DEVICE_ID_INTEL_82975_0 0x277c +#endif /* PCI_DEVICE_ID_INTEL_82975_0 */ + +#define I82975X_NR_CSROWS(nr_chans) (8/(nr_chans)) + +/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */ +#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b) + * + * 31:7 128 byte cache-line address + * 6:1 reserved + * 0 0: CH0; 1: CH1 + */ + +#define I82975X_DERRSYN 0x5c /* Dram Error SYNdrome (8b) + * + * 7:0 DRAM ECC Syndrome + */ + +#define I82975X_DES 0x5d /* Dram ERRor DeSTination (8b) + * 0h: Processor Memory Reads + * 1h:7h reserved + * More - See Page 65 of Intel DocSheet. + */ + +#define I82975X_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15:12 reserved + * 11 Thermal Sensor Event + * 10 reserved + * 9 non-DRAM lock error (ndlock) + * 8 Refresh Timeout + * 7:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +/* Error Reporting is supported by 3 mechanisms: + 1. DMI SERR generation ( ERRCMD ) + 2. SMI DMI generation ( SMICMD ) + 3. SCI DMI generation ( SCICMD ) +NOTE: Only ONE of the three must be enabled +*/ +#define I82975X_ERRCMD 0xca /* Error Command (16b) + * + * 15:12 reserved + * 11 Thermal Sensor Event + * 10 reserved + * 9 non-DRAM lock error (ndlock) + * 8 Refresh Timeout + * 7:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_SMICMD 0xcc /* Error Command (16b) + * + * 15:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_SCICMD 0xce /* Error Command (16b) + * + * 15:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_XEAP 0xfc /* Extended Dram Error Address Pointer (8b) + * + * 7:1 reserved + * 0 Bit32 of the Dram Error Address + */ + +#define I82975X_MCHBAR 0x44 /* + * + * 31:14 Base Addr of 16K memory-mapped + * configuration space + * 13:1 reserverd + * 0 mem-mapped config space enable + */ + +/* NOTE: Following addresses have to indexed using MCHBAR offset (44h, 32b) */ +/* Intel 82975x memory mapped register space */ + +#define I82975X_DRB_SHIFT 25 /* fixed 32MiB grain */ + +#define I82975X_DRB 0x100 /* DRAM Row Boundary (8b x 8) + * + * 7 set to 1 in highest DRB of + * channel if 4GB in ch. + * 6:2 upper boundary of rank in + * 32MB grains + * 1:0 set to 0 + */ +#define I82975X_DRB_CH0R0 0x100 +#define I82975X_DRB_CH0R1 0x101 +#define I82975X_DRB_CH0R2 0x102 +#define I82975X_DRB_CH0R3 0x103 +#define I82975X_DRB_CH1R0 0x180 +#define I82975X_DRB_CH1R1 0x181 +#define I82975X_DRB_CH1R2 0x182 +#define I82975X_DRB_CH1R3 0x183 + + +#define I82975X_DRA 0x108 /* DRAM Row Attribute (4b x 8) + * defines the PAGE SIZE to be used + * for the rank + * 7 reserved + * 6:4 row attr of odd rank, i.e. 1 + * 3 reserved + * 2:0 row attr of even rank, i.e. 0 + * + * 000 = unpopulated + * 001 = reserved + * 010 = 4KiB + * 011 = 8KiB + * 100 = 16KiB + * others = reserved + */ +#define I82975X_DRA_CH0R01 0x108 +#define I82975X_DRA_CH0R23 0x109 +#define I82975X_DRA_CH1R01 0x188 +#define I82975X_DRA_CH1R23 0x189 + + +#define I82975X_BNKARC 0x10e /* Type of device in each rank - Bank Arch (16b) + * + * 15:8 reserved + * 7:6 Rank 3 architecture + * 5:4 Rank 2 architecture + * 3:2 Rank 1 architecture + * 1:0 Rank 0 architecture + * + * 00 => x16 devices; i.e 4 banks + * 01 => x8 devices; i.e 8 banks + */ +#define I82975X_C0BNKARC 0x10e +#define I82975X_C1BNKARC 0x18e + + + +#define I82975X_DRC 0x120 /* DRAM Controller Mode0 (32b) + * + * 31:30 reserved + * 29 init complete + * 28:11 reserved, according to Intel + * 22:21 number of channels + * 00=1 01=2 in 82875 + * seems to be ECC mode + * bits in 82975 in Asus + * P5W + * 19:18 Data Integ Mode + * 00=none 01=ECC in 82875 + * 10:8 refresh mode + * 7 reserved + * 6:4 mode select + * 3:2 reserved + * 1:0 DRAM type 10=Second Revision + * DDR2 SDRAM + * 00, 01, 11 reserved + */ +#define I82975X_DRC_CH0M0 0x120 +#define I82975X_DRC_CH1M0 0x1A0 + + +#define I82975X_DRC_M1 0x124 /* DRAM Controller Mode1 (32b) + * 31 0=Standard Address Map + * 1=Enhanced Address Map + * 30:0 reserved + */ + +#define I82975X_DRC_CH0M1 0x124 +#define I82975X_DRC_CH1M1 0x1A4 + +enum i82975x_chips { + I82975X = 0, +}; + +struct i82975x_pvt { + void __iomem *mch_window; +}; + +struct i82975x_dev_info { + const char *ctl_name; +}; + +struct i82975x_error_info { + u16 errsts; + u32 eap; + u8 des; + u8 derrsyn; + u16 errsts2; + u8 chan; /* the channel is bit 0 of EAP */ + u8 xeap; /* extended eap bit */ +}; + +static const struct i82975x_dev_info i82975x_devs[] = { + [I82975X] = { + .ctl_name = "i82975x" + }, +}; + +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has + * already registered driver + */ + +static int i82975x_registered = 1; + +static void i82975x_get_error_info(struct mem_ctl_info *mci, + struct i82975x_error_info *info) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts); + pci_read_config_dword(pdev, I82975X_EAP, &info->eap); + pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap); + pci_read_config_byte(pdev, I82975X_DES, &info->des); + pci_read_config_byte(pdev, I82975X_DERRSYN, &info->derrsyn); + pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts2); + + pci_write_bits16(pdev, I82975X_ERRSTS, 0x0003, 0x0003); + + /* + * If the error is the same then we can for both reads then + * the first set of reads is valid. If there is a change then + * there is a CE no info and the second set of reads is valid + * and should be UE info. + */ + if (!(info->errsts2 & 0x0003)) + return; + + if ((info->errsts ^ info->errsts2) & 0x0003) { + pci_read_config_dword(pdev, I82975X_EAP, &info->eap); + pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap); + pci_read_config_byte(pdev, I82975X_DES, &info->des); + pci_read_config_byte(pdev, I82975X_DERRSYN, + &info->derrsyn); + } +} + +static int i82975x_process_error_info(struct mem_ctl_info *mci, + struct i82975x_error_info *info, int handle_errors) +{ + int row, multi_chan, chan; + + multi_chan = mci->csrows[0].nr_channels - 1; + + if (!(info->errsts2 & 0x0003)) + return 0; + + if (!handle_errors) + return 1; + + if ((info->errsts ^ info->errsts2) & 0x0003) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + chan = info->eap & 1; + info->eap >>= 1; + if (info->xeap ) + info->eap |= 0x80000000; + info->eap >>= PAGE_SHIFT; + row = edac_mc_find_csrow_by_page(mci, info->eap); + + if (info->errsts & 0x0002) + edac_mc_handle_ue(mci, info->eap, 0, row, "i82975x UE"); + else + edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, + multi_chan ? chan : 0, + "i82975x CE"); + + return 1; +} + +static void i82975x_check(struct mem_ctl_info *mci) +{ + struct i82975x_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i82975x_get_error_info(mci, &info); + i82975x_process_error_info(mci, &info, 1); +} + +/* Return 1 if dual channel mode is active. Else return 0. */ +static int dual_channel_active(void __iomem *mch_window) +{ + /* + * We treat interleaved-symmetric configuration as dual-channel - EAP's + * bit-0 giving the channel of the error location. + * + * All other configurations are treated as single channel - the EAP's + * bit-0 will resolve ok in symmetric area of mixed + * (symmetric/asymmetric) configurations + */ + u8 drb[4][2]; + int row; + int dualch; + + for (dualch = 1, row = 0; dualch && (row < 4); row++) { + drb[row][0] = readb(mch_window + I82975X_DRB + row); + drb[row][1] = readb(mch_window + I82975X_DRB + row + 0x80); + dualch = dualch && (drb[row][0] == drb[row][1]); + } + return dualch; +} + +static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) +{ + /* + * ASUS P5W DH either does not program this register or programs + * it wrong! + * ECC is possible on i92975x ONLY with DEV_X8 which should mean 'val' + * for each rank should be 01b - the LSB of the word should be 0x55; + * but it reads 0! + */ + return DEV_X8; +} + +static void i82975x_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, void __iomem *mch_window) +{ + struct csrow_info *csrow; + unsigned long last_cumul_size; + u8 value; + u32 cumul_size; + int index; + + last_cumul_size = 0; + + /* + * 82875 comment: + * The dram row boundary (DRB) reg values are boundary address + * for each DRAM row with a granularity of 32 or 64MB (single/dual + * channel operation). DRB regs are cumulative; therefore DRB7 will + * contain the total memory contained in all eight rows. + * + * FIXME: + * EDAC currently works for Dual-channel Interleaved configuration. + * Other configurations, which the chip supports, need fixing/testing. + * + */ + + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + + value = readb(mch_window + I82975X_DRB + index + + ((index >= 4) ? 0x80 : 0)); + cumul_size = value; + cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT); + debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, + cumul_size); + if (cumul_size == last_cumul_size) + continue; /* not populated */ + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = 1 << 7; /* I82975X_EAP has 128B resolution */ + csrow->mtype = MEM_DDR; /* i82975x supports only DDR2 */ + csrow->dtype = i82975x_dram_type(mch_window, index); + csrow->edac_mode = EDAC_SECDED; /* only supported */ + } +} + +/* #define i82975x_DEBUG_IOMEM */ + +#ifdef i82975x_DEBUG_IOMEM +static void i82975x_print_dram_timings(void __iomem *mch_window) +{ + /* + * The register meanings are from Intel specs; + * (shows 13-5-5-5 for 800-DDR2) + * Asus P5W Bios reports 15-5-4-4 + * What's your religion? + */ + static const int caslats[4] = { 5, 4, 3, 6 }; + u32 dtreg[2]; + + dtreg[0] = readl(mch_window + 0x114); + dtreg[1] = readl(mch_window + 0x194); + i82975x_printk(KERN_INFO, "DRAM Timings : Ch0 Ch1\n" + " RAS Active Min = %d %d\n" + " CAS latency = %d %d\n" + " RAS to CAS = %d %d\n" + " RAS precharge = %d %d\n", + (dtreg[0] >> 19 ) & 0x0f, + (dtreg[1] >> 19) & 0x0f, + caslats[(dtreg[0] >> 8) & 0x03], + caslats[(dtreg[1] >> 8) & 0x03], + ((dtreg[0] >> 4) & 0x07) + 2, + ((dtreg[1] >> 4) & 0x07) + 2, + (dtreg[0] & 0x07) + 2, + (dtreg[1] & 0x07) + 2 + ); + +} +#endif + +static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc = -ENODEV; + struct mem_ctl_info *mci; + struct i82975x_pvt *pvt; + void __iomem *mch_window; + u32 mchbar; + u32 drc[2]; + struct i82975x_error_info discard; + int chans; +#ifdef i82975x_DEBUG_IOMEM + u8 c0drb[4]; + u8 c1drb[4]; +#endif + + debugf0("%s()\n", __func__); + + pci_read_config_dword(pdev, I82975X_MCHBAR, &mchbar); + if (!(mchbar & 1)) { + debugf3("%s(): failed, MCHBAR disabled!\n", __func__); + goto fail0; + } + mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ + mch_window = ioremap_nocache(mchbar, 0x1000); + +#ifdef i82975x_DEBUG_IOMEM + i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", + mchbar, mch_window); + + c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0); + c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1); + c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2); + c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3); + c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0); + c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1); + c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2); + c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3); + i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]); + i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]); + i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]); + i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]); + i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]); + i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]); + i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]); + i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]); +#endif + + drc[0] = readl(mch_window + I82975X_DRC_CH0M0); + drc[1] = readl(mch_window + I82975X_DRC_CH1M0); +#ifdef i82975x_DEBUG_IOMEM + i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0], + ((drc[0] >> 21) & 3) == 1 ? + "ECC enabled" : "ECC disabled"); + i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1], + ((drc[1] >> 21) & 3) == 1 ? + "ECC enabled" : "ECC disabled"); + + i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n", + readw(mch_window + I82975X_C0BNKARC)); + i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n", + readw(mch_window + I82975X_C1BNKARC)); + i82975x_print_dram_timings(mch_window); + goto fail1; +#endif + if (!(((drc[0] >> 21) & 3) == 1 || ((drc[1] >> 21) & 3) == 1)) { + i82975x_printk(KERN_INFO, "ECC disabled on both channels.\n"); + goto fail1; + } + + chans = dual_channel_active(mch_window) + 1; + + /* assuming only one controller, index thus is 0 */ + mci = edac_mc_alloc(sizeof(*pvt), I82975X_NR_CSROWS(chans), + chans, 0); + if (!mci) { + rc = -ENOMEM; + goto fail1; + } + + debugf3("%s(): init mci\n", __func__); + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I82975X_REVISION; + mci->ctl_name = i82975x_devs[dev_idx].ctl_name; + mci->edac_check = i82975x_check; + mci->ctl_page_to_phys = NULL; + debugf3("%s(): init pvt\n", __func__); + pvt = (struct i82975x_pvt *) mci->pvt_info; + pvt->mch_window = mch_window; + i82975x_init_csrows(mci, pdev, mch_window); + i82975x_get_error_info(mci, &discard); /* clear counters */ + + /* finalize this instance of memory controller with edac core */ + if (edac_mc_add_mc(mci)) { + debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + goto fail2; + } + + /* get this far and it's successful */ + debugf3("%s(): success\n", __func__); + return 0; + +fail2: + edac_mc_free(mci); + +fail1: + iounmap(mch_window); +fail0: + return rc; +} + +/* returns count (>= 0), or negative on error */ +static int __devinit i82975x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("%s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i82975x_probe1(pdev, ent->driver_data); + + if (mci_pdev == NULL) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i82975x_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + struct i82975x_pvt *pvt; + + debugf0("%s()\n", __func__); + + mci = edac_mc_del_mc(&pdev->dev); + if (mci == NULL) + return; + + pvt = mci->pvt_info; + if (pvt->mch_window) + iounmap( pvt->mch_window ); + + edac_mc_free(mci); +} + +static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82975X + }, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl); + +static struct pci_driver i82975x_driver = { + .name = EDAC_MOD_STR, + .probe = i82975x_init_one, + .remove = __devexit_p(i82975x_remove_one), + .id_table = i82975x_pci_tbl, +}; + +static int __init i82975x_init(void) +{ + int pci_rc; + + debugf3("%s()\n", __func__); + + pci_rc = pci_register_driver(&i82975x_driver); + if (pci_rc < 0) + goto fail0; + + if (mci_pdev == NULL) { + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82975_0, NULL); + + if (!mci_pdev) { + debugf0("i82975x pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i82975x_init_one(mci_pdev, i82975x_pci_tbl); + + if (pci_rc < 0) { + debugf0("i82975x init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i82975x_driver); + +fail0: + if (mci_pdev != NULL) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i82975x_exit(void) +{ + debugf3("%s()\n", __func__); + + pci_unregister_driver(&i82975x_driver); + + if (!i82975x_registered) { + i82975x_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i82975x_init); +module_exit(i82975x_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arvind R. <arvind@acarlab.com>"); +MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c new file mode 100644 index 00000000000..e66cdd42a39 --- /dev/null +++ b/drivers/edac/pasemi_edac.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Author: Egor Martovetsky <egor@pasemi.com> + * Maintained by: Olof Johansson <olof@lixom.net> + * + * Driver for the PWRficient onchip memory controllers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include "edac_core.h" + +#define MODULE_NAME "pasemi_edac" + +#define MCCFG_MCEN 0x300 +#define MCCFG_MCEN_MMC_EN 0x00000001 +#define MCCFG_ERRCOR 0x388 +#define MCCFG_ERRCOR_RNK_FAIL_DET_EN 0x00000100 +#define MCCFG_ERRCOR_ECC_GEN_EN 0x00000010 +#define MCCFG_ERRCOR_ECC_CRR_EN 0x00000001 +#define MCCFG_SCRUB 0x384 +#define MCCFG_SCRUB_RGLR_SCRB_EN 0x00000001 +#define MCDEBUG_ERRCTL1 0x728 +#define MCDEBUG_ERRCTL1_RFL_LOG_EN 0x00080000 +#define MCDEBUG_ERRCTL1_MBE_LOG_EN 0x00040000 +#define MCDEBUG_ERRCTL1_SBE_LOG_EN 0x00020000 +#define MCDEBUG_ERRSTA 0x730 +#define MCDEBUG_ERRSTA_RFL_STATUS 0x00000004 +#define MCDEBUG_ERRSTA_MBE_STATUS 0x00000002 +#define MCDEBUG_ERRSTA_SBE_STATUS 0x00000001 +#define MCDEBUG_ERRCNT1 0x734 +#define MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO 0x00000080 +#define MCDEBUG_ERRLOG1A 0x738 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_M 0x30000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_NONE 0x00000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_SBE 0x10000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_MBE 0x20000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_RFL 0x30000000 +#define MCDEBUG_ERRLOG1A_MERR_BA_M 0x00700000 +#define MCDEBUG_ERRLOG1A_MERR_BA_S 20 +#define MCDEBUG_ERRLOG1A_MERR_CS_M 0x00070000 +#define MCDEBUG_ERRLOG1A_MERR_CS_S 16 +#define MCDEBUG_ERRLOG1A_SYNDROME_M 0x0000ffff +#define MCDRAM_RANKCFG 0x114 +#define MCDRAM_RANKCFG_EN 0x00000001 +#define MCDRAM_RANKCFG_TYPE_SIZE_M 0x000001c0 +#define MCDRAM_RANKCFG_TYPE_SIZE_S 6 + +#define PASEMI_EDAC_NR_CSROWS 8 +#define PASEMI_EDAC_NR_CHANS 1 +#define PASEMI_EDAC_ERROR_GRAIN 64 + +static int last_page_in_mmc; +static int system_mmc_id; + + +static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci) +{ + struct pci_dev *pdev = to_pci_dev(mci->dev); + u32 tmp; + + pci_read_config_dword(pdev, MCDEBUG_ERRSTA, + &tmp); + + tmp &= (MCDEBUG_ERRSTA_RFL_STATUS | MCDEBUG_ERRSTA_MBE_STATUS + | MCDEBUG_ERRSTA_SBE_STATUS); + + if (tmp) { + if (tmp & MCDEBUG_ERRSTA_SBE_STATUS) + pci_write_config_dword(pdev, MCDEBUG_ERRCNT1, + MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO); + pci_write_config_dword(pdev, MCDEBUG_ERRSTA, tmp); + } + + return tmp; +} + +static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta) +{ + struct pci_dev *pdev = to_pci_dev(mci->dev); + u32 errlog1a; + u32 cs; + + if (!errsta) + return; + + pci_read_config_dword(pdev, MCDEBUG_ERRLOG1A, &errlog1a); + + cs = (errlog1a & MCDEBUG_ERRLOG1A_MERR_CS_M) >> + MCDEBUG_ERRLOG1A_MERR_CS_S; + + /* uncorrectable/multi-bit errors */ + if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS | + MCDEBUG_ERRSTA_RFL_STATUS)) { + edac_mc_handle_ue(mci, mci->csrows[cs].first_page, 0, + cs, mci->ctl_name); + } + + /* correctable/single-bit errors */ + if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) { + edac_mc_handle_ce(mci, mci->csrows[cs].first_page, 0, + 0, cs, 0, mci->ctl_name); + } +} + +static void pasemi_edac_check(struct mem_ctl_info *mci) +{ + u32 errsta; + + errsta = pasemi_edac_get_error_info(mci); + if (errsta) + pasemi_edac_process_error_info(mci, errsta); +} + +static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, + enum edac_type edac_mode) +{ + struct csrow_info *csrow; + u32 rankcfg; + int index; + + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + + pci_read_config_dword(pdev, + MCDRAM_RANKCFG + (index * 12), + &rankcfg); + + if (!(rankcfg & MCDRAM_RANKCFG_EN)) + continue; + + switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >> + MCDRAM_RANKCFG_TYPE_SIZE_S) { + case 0: + csrow->nr_pages = 128 << (20 - PAGE_SHIFT); + break; + case 1: + csrow->nr_pages = 256 << (20 - PAGE_SHIFT); + break; + case 2: + case 3: + csrow->nr_pages = 512 << (20 - PAGE_SHIFT); + break; + case 4: + csrow->nr_pages = 1024 << (20 - PAGE_SHIFT); + break; + case 5: + csrow->nr_pages = 2048 << (20 - PAGE_SHIFT); + break; + default: + edac_mc_printk(mci, KERN_ERR, + "Unrecognized Rank Config. rankcfg=%u\n", + rankcfg); + return -EINVAL; + } + + csrow->first_page = last_page_in_mmc; + csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + last_page_in_mmc += csrow->nr_pages; + csrow->page_mask = 0; + csrow->grain = PASEMI_EDAC_ERROR_GRAIN; + csrow->mtype = MEM_DDR; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = edac_mode; + } + return 0; +} + +static int __devinit pasemi_edac_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct mem_ctl_info *mci = NULL; + u32 errctl1, errcor, scrub, mcen; + + pci_read_config_dword(pdev, MCCFG_MCEN, &mcen); + if (!(mcen & MCCFG_MCEN_MMC_EN)) + return -ENODEV; + + /* + * We should think about enabling other error detection later on + */ + + pci_read_config_dword(pdev, MCDEBUG_ERRCTL1, &errctl1); + errctl1 |= MCDEBUG_ERRCTL1_SBE_LOG_EN | + MCDEBUG_ERRCTL1_MBE_LOG_EN | + MCDEBUG_ERRCTL1_RFL_LOG_EN; + pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1); + + mci = edac_mc_alloc(0, PASEMI_EDAC_NR_CSROWS, PASEMI_EDAC_NR_CHANS, + system_mmc_id++); + + if (mci == NULL) + return -ENOMEM; + + pci_read_config_dword(pdev, MCCFG_ERRCOR, &errcor); + errcor |= MCCFG_ERRCOR_RNK_FAIL_DET_EN | + MCCFG_ERRCOR_ECC_GEN_EN | + MCCFG_ERRCOR_ECC_CRR_EN; + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + mci->edac_cap = (errcor & MCCFG_ERRCOR_ECC_GEN_EN) ? + ((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ? + (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_EC) : + EDAC_FLAG_NONE; + mci->mod_name = MODULE_NAME; + mci->dev_name = pci_name(pdev); + mci->ctl_name = "pasemi,1682m-mc"; + mci->edac_check = pasemi_edac_check; + mci->ctl_page_to_phys = NULL; + pci_read_config_dword(pdev, MCCFG_SCRUB, &scrub); + mci->scrub_cap = SCRUB_FLAG_HW_PROG | SCRUB_FLAG_HW_SRC; + mci->scrub_mode = + ((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ? SCRUB_FLAG_HW_SRC : 0) | + ((scrub & MCCFG_SCRUB_RGLR_SCRB_EN) ? SCRUB_FLAG_HW_PROG : 0); + + if (pasemi_edac_init_csrows(mci, pdev, + (mci->edac_cap & EDAC_FLAG_SECDED) ? + EDAC_SECDED : + ((mci->edac_cap & EDAC_FLAG_EC) ? + EDAC_EC : EDAC_NONE))) + goto fail; + + /* + * Clear status + */ + pasemi_edac_get_error_info(mci); + + if (edac_mc_add_mc(mci)) + goto fail; + + /* get this far and it's successful */ + return 0; + +fail: + edac_mc_free(mci); + return -ENODEV; +} + +static void __devexit pasemi_edac_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (!mci) + return; + + edac_mc_free(mci); +} + + +static const struct pci_device_id pasemi_edac_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) }, +}; + +MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl); + +static struct pci_driver pasemi_edac_driver = { + .name = MODULE_NAME, + .probe = pasemi_edac_probe, + .remove = __devexit_p(pasemi_edac_remove), + .id_table = pasemi_edac_pci_tbl, +}; + +static int __init pasemi_edac_init(void) +{ + return pci_register_driver(&pasemi_edac_driver); +} + +static void __exit pasemi_edac_exit(void) +{ + pci_unregister_driver(&pasemi_edac_driver); +} + +module_init(pasemi_edac_init); +module_exit(pasemi_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>"); +MODULE_DESCRIPTION("MC support for PA Semi PA6T-1682M memory controller"); diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index a49cf0a3939..e25f712f2dc 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -11,7 +11,7 @@ * * Written with reference to 82600 High Integration Dual PCI System * Controller Data Book: - * http://www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf + * www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf * references to this document given in [] */ @@ -20,9 +20,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define R82600_REVISION " Ver: 2.0.1 " __DATE__ +#define R82600_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "r82600_edac" #define r82600_printk(level, fmt, arg...) \ @@ -131,10 +131,12 @@ struct r82600_error_info { u32 eapr; }; -static unsigned int disable_hardware_scrub = 0; +static unsigned int disable_hardware_scrub; -static void r82600_get_error_info (struct mem_ctl_info *mci, - struct r82600_error_info *info) +static struct edac_pci_ctl_info *r82600_pci; + +static void r82600_get_error_info(struct mem_ctl_info *mci, + struct r82600_error_info *info) { struct pci_dev *pdev; @@ -144,18 +146,19 @@ static void r82600_get_error_info (struct mem_ctl_info *mci, if (info->eapr & BIT(0)) /* Clear error to allow next error to be reported [p.62] */ pci_write_bits32(pdev, R82600_EAP, - ((u32) BIT(0) & (u32) BIT(1)), - ((u32) BIT(0) & (u32) BIT(1))); + ((u32) BIT(0) & (u32) BIT(1)), + ((u32) BIT(0) & (u32) BIT(1))); if (info->eapr & BIT(1)) /* Clear error to allow next error to be reported [p.62] */ pci_write_bits32(pdev, R82600_EAP, - ((u32) BIT(0) & (u32) BIT(1)), - ((u32) BIT(0) & (u32) BIT(1))); + ((u32) BIT(0) & (u32) BIT(1)), + ((u32) BIT(0) & (u32) BIT(1))); } -static int r82600_process_error_info (struct mem_ctl_info *mci, - struct r82600_error_info *info, int handle_errors) +static int r82600_process_error_info(struct mem_ctl_info *mci, + struct r82600_error_info *info, + int handle_errors) { int error_found; u32 eapaddr, page; @@ -172,25 +175,24 @@ static int r82600_process_error_info (struct mem_ctl_info *mci, * granularity (upper 19 bits only) */ page = eapaddr >> PAGE_SHIFT; - if (info->eapr & BIT(0)) { /* CE? */ + if (info->eapr & BIT(0)) { /* CE? */ error_found = 1; if (handle_errors) - edac_mc_handle_ce(mci, page, 0, /* not avail */ + edac_mc_handle_ce(mci, page, 0, /* not avail */ syndrome, edac_mc_find_csrow_by_page(mci, page), - 0, /* channel */ - mci->ctl_name); + 0, mci->ctl_name); } - if (info->eapr & BIT(1)) { /* UE? */ + if (info->eapr & BIT(1)) { /* UE? */ error_found = 1; if (handle_errors) /* 82600 doesn't give enough info */ edac_mc_handle_ue(mci, page, 0, - edac_mc_find_csrow_by_page(mci, page), - mci->ctl_name); + edac_mc_find_csrow_by_page(mci, page), + mci->ctl_name); } return error_found; @@ -211,11 +213,11 @@ static inline int ecc_enabled(u8 dramcr) } static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - u8 dramcr) + u8 dramcr) { struct csrow_info *csrow; int index; - u8 drbar; /* SDRAM Row Boundry Address Register */ + u8 drbar; /* SDRAM Row Boundry Address Register */ u32 row_high_limit, row_high_limit_last; u32 reg_sdram, ecc_on, row_base; @@ -276,7 +278,7 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) debugf2("%s(): sdram refresh rate = %#0x\n", __func__, sdram_refresh_rate); debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr); - mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS); + mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS, 0); if (mci == NULL) return -ENOMEM; @@ -305,15 +307,16 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = R82600_REVISION; mci->ctl_name = "R82600"; + mci->dev_name = pci_name(pdev); mci->edac_check = r82600_check; mci->ctl_page_to_phys = NULL; r82600_init_csrows(mci, pdev, dramcr); - r82600_get_error_info(mci, &discard); /* clear counters */ + r82600_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } @@ -326,6 +329,17 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) pci_write_bits32(pdev, R82600_EAP, BIT(31), BIT(31)); } + /* allocating generic PCI control info */ + r82600_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!r82600_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + debugf3("%s(): success\n", __func__); return 0; @@ -336,7 +350,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit r82600_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -350,6 +364,9 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (r82600_pci) + edac_pci_release_generic_ctl(r82600_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -358,11 +375,11 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { { - PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) - }, + PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) + }, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, r82600_pci_tbl); @@ -389,7 +406,7 @@ module_exit(r82600_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. " - "on behalf of EADS Astrium"); + "on behalf of EADS Astrium"); MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 41476abc069..db703758db9 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -224,6 +224,7 @@ ohci_update_phy_reg(struct fw_card *card, int addr, u32 val, old; reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr)); + flush_writes(ohci); msleep(2); val = reg_read(ohci, OHCI1394_PhyControl); if ((val & OHCI1394_PhyControl_ReadDone) == 0) { @@ -586,7 +587,7 @@ static void context_stop(struct context *ctx) break; fw_notify("context_stop: still active (0x%08x)\n", reg); - msleep(1); + mdelay(1); } } diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 7c53be0387f..fc984474162 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -840,7 +840,6 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) container_of(base_orb, struct sbp2_command_orb, base); struct fw_unit *unit = orb->unit; struct fw_device *device = fw_device(unit->device.parent); - struct scatterlist *sg; int result; if (status != NULL) { @@ -876,11 +875,10 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) dma_unmap_single(device->card->device, orb->base.request_bus, sizeof(orb->request), DMA_TO_DEVICE); - if (orb->cmd->use_sg > 0) { - sg = (struct scatterlist *)orb->cmd->request_buffer; - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + if (scsi_sg_count(orb->cmd) > 0) + dma_unmap_sg(device->card->device, scsi_sglist(orb->cmd), + scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); - } if (orb->page_table_bus != 0) dma_unmap_single(device->card->device, orb->page_table_bus, @@ -901,8 +899,8 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) int sg_len, l, i, j, count; dma_addr_t sg_addr; - sg = (struct scatterlist *)orb->cmd->request_buffer; - count = dma_map_sg(device->card->device, sg, orb->cmd->use_sg, + sg = scsi_sglist(orb->cmd); + count = dma_map_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); if (count == 0) goto fail; @@ -971,7 +969,7 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) return 0; fail_page_table: - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + dma_unmap_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); fail: return -ENOMEM; @@ -1031,7 +1029,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) orb->request.misc |= COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA); - if (cmd->use_sg && sbp2_command_orb_map_scatterlist(orb) < 0) + if (scsi_sg_count(cmd) && sbp2_command_orb_map_scatterlist(orb) < 0) goto fail_mapping; fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index 80d0121463d..3ce8e2fbe15 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -605,8 +605,10 @@ fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) * check is sufficient to ensure we don't send response to * broadcast packets or posted writes. */ - if (request->ack != ACK_PENDING) + if (request->ack != ACK_PENDING) { + kfree(request); return; + } if (rcode == RCODE_COMPLETE) fw_fill_response(&request->response, request->request_header, @@ -628,11 +630,6 @@ fw_core_handle_request(struct fw_card *card, struct fw_packet *p) unsigned long flags; int tcode, destination, source; - if (p->payload_length > 2048) { - /* FIXME: send error response. */ - return; - } - if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) return; diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h index 5abed193f4a..5ceaccd1056 100644 --- a/drivers/firewire/fw-transaction.h +++ b/drivers/firewire/fw-transaction.h @@ -123,6 +123,10 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, size_t length, void *callback_data); +/* + * Important note: The callback must guarantee that either fw_send_response() + * or kfree() is called on the @request. + */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, int tcode, int destination, int source, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index c5b5011da56..f9de7984441 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -55,7 +55,7 @@ #include <asm/bitops.h> static int __ide_end_request(ide_drive_t *drive, struct request *rq, - int uptodate, int nr_sectors) + int uptodate, unsigned int nr_bytes) { int ret = 1; @@ -64,7 +64,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, * complete the whole request right now */ if (blk_noretry_request(rq) && end_io_error(uptodate)) - nr_sectors = rq->hard_nr_sectors; + nr_bytes = rq->hard_nr_sectors << 9; if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors) rq->errors = -EIO; @@ -78,7 +78,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, HWGROUP(drive)->hwif->ide_dma_on(drive); } - if (!end_that_request_first(rq, uptodate, nr_sectors)) { + if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { add_disk_randomness(rq->rq_disk); if (!list_empty(&rq->queuelist)) blkdev_dequeue_request(rq); @@ -103,6 +103,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) { + unsigned int nr_bytes = nr_sectors << 9; struct request *rq; unsigned long flags; int ret = 1; @@ -114,10 +115,14 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; - if (!nr_sectors) - nr_sectors = rq->hard_cur_sectors; + if (!nr_bytes) { + if (blk_pc_request(rq)) + nr_bytes = rq->data_len; + else + nr_bytes = rq->hard_cur_sectors << 9; + } - ret = __ide_end_request(drive, rq, uptodate, nr_sectors); + ret = __ide_end_request(drive, rq, uptodate, nr_bytes); spin_unlock_irqrestore(&ide_lock, flags); return ret; diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 6e935d7c63f..c2e29571b00 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -165,12 +165,11 @@ static int __devinit swarm_ide_init_module(void) goto out; } - if (!(pldev = kmalloc(sizeof (*pldev), GFP_KERNEL))) { + if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) { err = -ENOMEM; goto out_unregister_driver; } - memset (pldev, 0, sizeof (*pldev)); pldev->name = swarm_ide_string; pldev->id = 0; pldev->dev.release = swarm_ide_platform_release; diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a91001c59b6..c5c33d35f87 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -295,10 +295,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct addr_req *req; int ret = 0; - req = kmalloc(sizeof *req, GFP_KERNEL); + req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; - memset(req, 0, sizeof *req); if (src_addr) memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9820c67ba47..4df269f5d9a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3374,7 +3374,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -void cm_get_ack_delay(struct cm_device *cm_dev) +static void cm_get_ack_delay(struct cm_device *cm_dev) { struct ib_device_attr attr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 23af7a032a0..9ffb9987450 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -573,7 +573,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, break; case RDMA_TRANSPORT_IWARP: if (!id_priv->cm_id.iw) { - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3b41dc0c39d..9574088f0d4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -229,9 +229,8 @@ static void *alloc_ep(int size, gfp_t gfp) { struct iwch_ep_common *epc; - epc = kmalloc(size, gfp); + epc = kzalloc(size, gfp); if (epc) { - memset(epc, 0, size); kref_init(&epc->kref); spin_lock_init(&epc->lock); init_waitqueue_head(&epc->waitq); @@ -1914,6 +1913,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) fail3: cxgb3_free_stid(ep->com.tdev, ep->stid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); fail1: out: diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 3cd6bf3402d..e53a97af126 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.ipd = (ah_mult > 0) ? ((ehca_mult - 1) / ah_mult) : 0; } else - av->av.ipd = ehca_static_rate; + av->av.ipd = ehca_static_rate; av->av.lnh = ah_attr->ah_flags; av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index daf823ea1ac..043e4fb23fb 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -204,11 +204,11 @@ struct ehca_mr { spinlock_t mrlock; enum ehca_mr_flag flags; - u32 num_pages; /* number of MR pages */ - u32 num_4k; /* number of 4k "page" portions to form MR */ + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ int acl; /* ACL (stored here for usage in reregister) */ u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ + /* usage in reregister) */ u64 size; /* size (stored here for usage in reregister) */ u32 fmr_page_size; /* page size for FMR */ u32 fmr_max_pages; /* max pages for FMR */ @@ -217,9 +217,6 @@ struct ehca_mr { /* fw specific data */ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ struct h_galpas galpas; - /* data for userspace bridge */ - u32 nr_of_pages; - void *pagearray; }; struct ehca_mw { @@ -241,26 +238,29 @@ enum ehca_mr_pgi_type { struct ehca_mr_pginfo { enum ehca_mr_pgi_type type; - u64 num_pages; - u64 page_cnt; - u64 num_4k; /* number of 4k "page" portions */ - u64 page_4k_cnt; /* counter for 4k "page" portions */ - u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ - - /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - - /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct ib_umem_chunk *next_chunk; - u64 next_nmap; - - /* type EHCA_MR_PGI_FMR section */ - u64 *page_list; - u64 next_listelem; - /* next_4k also used within EHCA_MR_PGI_FMR */ + u64 num_kpages; + u64 kpage_cnt; + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; }; /* output parameters for MR/FMR hipz calls */ @@ -391,6 +391,6 @@ struct ehca_alloc_qp_parms { int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index fb3df5c271e..1798e6466bd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block { u32 reserved_70_127[58]; /* 70 */ }; -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) -#define MQPCB_DLID EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) #endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 01d4a148bd7..9e87883b561 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) return ret; } -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) { struct ehca_qp *ret = NULL; unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 4961eb88827..4825975f88c 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca, for (i = 0; i < nr_pages; i++) { u64 rpage; - if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) { ret = H_RESOURCE; goto create_eq_exit2; } diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bbd3c6a5822..fc19ef9fd96 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -127,6 +127,7 @@ int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -137,7 +138,8 @@ int ehca_query_port(struct ib_device *ibdev, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_port1; @@ -197,6 +199,7 @@ int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, struct ehca_sma_attr *attr) { int ret = 0; + u64 h_ret; struct hipz_query_port *rblock; rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); @@ -205,7 +208,8 @@ int ehca_query_sma_attr(struct ehca_shca *shca, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_sma_attr1; @@ -230,9 +234,11 @@ query_sma_attr1: int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + u64 h_ret; + struct ehca_shca *shca; struct hipz_query_port *rblock; + shca = container_of(ibdev, struct ehca_shca, ib_device); if (index > 16) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; @@ -244,7 +250,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_pkey1; @@ -262,6 +269,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -277,7 +285,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_gid1; @@ -302,11 +311,12 @@ int ehca_modify_port(struct ib_device *ibdev, struct ib_port_modify *props) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_shca *shca; struct hipz_query_port *rblock; u32 cap; u64 hret; + shca = container_of(ibdev, struct ehca_shca, ib_device); if ((props->set_port_cap_mask | props->clr_port_cap_mask) & ~allowed_port_caps) { ehca_err(&shca->ib_device, "Non-changeable bits set in masks " @@ -325,7 +335,8 @@ int ehca_modify_port(struct ib_device *ibdev, goto modify_port1; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto modify_port2; @@ -337,7 +348,8 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", + hret); ret = -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 96eba383075..4fb01fcb63a 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -49,26 +49,26 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; +static struct ehca_comp_pool *pool; #ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; #endif @@ -85,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -94,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) @@ -107,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, "CQ 0x%x (resource=%lx) has errors.", @@ -572,7 +572,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -636,7 +636,7 @@ static void queue_comp_task(struct ehca_cq *__cq) __queue_comp_task(__cq, cct); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; unsigned long flags; @@ -666,12 +666,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) static int comp_task(void *__cct) { - struct ehca_cpu_comp_task* cct = __cct; + struct ehca_cpu_comp_task *cct = __cct; int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { + while (!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); spin_lock_irq(&cct->task_lock); @@ -745,7 +745,7 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); - while(!list_empty(&list)) { + while (!list_empty(&list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); list_del(&cq->entry); @@ -768,7 +768,7 @@ static int comp_pool_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { + if (!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); return NOTIFY_BAD; } @@ -838,7 +838,7 @@ int ehca_create_comp_pool(void) #ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; + comp_pool_callback_nb.priority = 0; register_cpu_notifier(&comp_pool_callback_nb); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 77aeca6a2c2..dce503bb7d6 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -81,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, int num_phys_buf, int mr_access_flags, u64 *iova_start); -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata); +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); int ehca_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, @@ -192,7 +193,7 @@ void ehca_poll_eqs(unsigned long data); void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 28ba2dd2421..36377c6db3d 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -107,7 +107,7 @@ static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache = NULL; +static struct kmem_cache *ctblk_cache; void *ehca_alloc_fw_ctrlblock(gfp_t flags) { @@ -200,8 +200,8 @@ static void ehca_destroy_slab_caches(void) #endif } -#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) -#define EHCA_REVID EHCA_BMASK_IBM(40,63) +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) static struct cap_descr { u64 mask; @@ -263,22 +263,27 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0x11; - else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 0x12; - else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 0x13; - else if ((hcaaver == 2) && (revid == 0)) - shca->hw_level = 0x21; - else if ((hcaaver == 2) && (revid == 0x10)) - shca->hw_level = 0x22; - else { + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { ehca_gen_warn("unknown hardware version" " - assuming default level"); shca->hw_level = 0x22; } - } + } else + shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; @@ -290,7 +295,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - port = (struct hipz_query_port *) rblock; + port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { ehca_gen_err("Cannot query port properties. h_ret=%lx", @@ -439,7 +444,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); @@ -666,7 +671,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, } /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); if (IS_ERR(ibpd)) { ehca_err(&shca->ib_device, "Cannot create internal PD."); ret = PTR_ERR(ibpd); @@ -863,18 +868,21 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - if ((ret = ehca_create_comp_pool())) { + ret = ehca_create_comp_pool(); + if (ret) { ehca_gen_err("Cannot create comp pool."); return ret; } - if ((ret = ehca_create_slab_caches())) { + ret = ehca_create_slab_caches(); + if (ret) { ehca_gen_err("Cannot create SLAB caches"); ret = -ENOMEM; goto module_init1; } - if ((ret = ibmebus_register_driver(&ehca_driver))) { + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; goto module_init2; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index add79bd44e3..6262c5462d5 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -48,6 +48,11 @@ #include "hcp_if.h" #include "hipz_hw.h" +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -56,9 +61,9 @@ static struct ehca_mr *ehca_mr_new(void) struct ehca_mr *me; me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mrlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -74,9 +79,9 @@ static struct ehca_mw *ehca_mw_new(void) struct ehca_mw *me; me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mwlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -106,11 +111,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); if (ret) { + ehca_mr_delete(e_maxmr); ib_mr = ERR_PTR(ret); goto get_dma_mr_exit0; } @@ -144,9 +150,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); u64 size; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ if ((num_phys_buf <= 0) || !phys_buf_array) { ehca_err(pd->device, "bad input values: num_phys_buf=%x " @@ -190,12 +193,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit0; } - /* determine number of MR pages */ - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - /* register MR on HCA */ if (ehca_mr_is_maxmr(size, iova_start)) { e_mr->flags |= EHCA_MR_FLAG_MAXMR; @@ -207,13 +204,22 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit1; } } else { - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + + size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, @@ -240,18 +246,19 @@ reg_phys_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata) +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) { struct ib_mr *ib_mr; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; int ret; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; if (!pd) { ehca_gen_err("bad pd=%p", pd); @@ -289,7 +296,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt e_mr->umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); if (IS_ERR(e_mr->umem)) { - ib_mr = (void *) e_mr->umem; + ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; } @@ -301,23 +308,24 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt } /* determine number of MR pages */ - num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / - PAGE_SIZE); - num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + num_hwpages = NUM_CHUNKS((virt % EHCA_PAGESIZE) + length, + EHCA_PAGESIZE); /* register MR on HCA */ - pginfo.type = EHCA_MR_PGI_USER; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.region = e_mr->umem; - pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; - pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, - (&e_mr->umem->chunk_list), - list); - - ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = e_mr->umem->offset / EHCA_PAGESIZE; + pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, + (&e_mr->umem->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; @@ -360,9 +368,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, struct ehca_pd *new_pd; u32 tmp_lkey, tmp_rkey; unsigned long sl_flags; - u32 num_pages_mr = 0; - u32 num_pages_4k = 0; /* 4k portion "pages" */ - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -414,7 +422,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit0; } if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" " phys_buf_array=%p num_phys_buf=%x", mr_rereg_mask, phys_buf_array, num_phys_buf); ret = -EINVAL; @@ -438,10 +446,10 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, /* set requested values dependent on rereg request */ spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; /* new == old address */ - new_size = e_mr->size; /* new == old length */ - new_acl = e_mr->acl; /* new == old access control */ - new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); if (mr_rereg_mask & IB_MR_REREG_TRANS) { new_start = iova_start; /* change address */ @@ -458,17 +466,18 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, ret = -EINVAL; goto rereg_phys_mr_exit1; } - num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % EHCA_PAGESIZE) + + new_size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -510,7 +519,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && (my_pd->ownpid != cur_pid)) { @@ -536,14 +545,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) "hca_hndl=%lx mr_hndl=%lx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_query_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto query_mr_exit1; } - mr_attr->pd = mr->pd; + mr_attr->pd = mr->pd; mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); query_mr_exit1: @@ -596,7 +605,7 @@ int ehca_dereg_mr(struct ib_mr *mr) "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto dereg_mr_exit0; } @@ -622,7 +631,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout = {{0},0}; + struct ehca_mw_hipzout_parms hipzout; e_mw = ehca_mw_new(); if (!e_mw) { @@ -636,7 +645,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " "shca=%p hca_hndl=%lx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; } /* successful MW allocation */ @@ -679,7 +688,7 @@ int ehca_dealloc_mw(struct ib_mw *mw) "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); - return ehca_mrmw_map_hrc_free_mw(h_ret); + return ehca2ib_return_code(h_ret); } /* successful deallocation */ ehca_mw_delete(e_mw); @@ -699,7 +708,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, struct ehca_mr *e_fmr; int ret; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; /* check other parameters */ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && @@ -745,6 +754,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, e_fmr->flags |= EHCA_MR_FLAG_FMR; /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, @@ -783,7 +793,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, container_of(fmr->device, struct ehca_shca, ib_device); struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; u32 tmp_lkey, tmp_rkey; if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { @@ -809,14 +819,16 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); } - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = list_len; - pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); - pginfo.page_list = page_list; - pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / - EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.num_hwpages = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, list_len * e_fmr->fmr_page_size, e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); if (ret) @@ -831,8 +843,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " - "iova=%lx", - ret, fmr, page_list, list_len, iova); + "iova=%lx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -922,7 +933,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto free_fmr_exit0; } /* successful deregistration */ @@ -950,12 +961,12 @@ int ehca_reg_mr(struct ehca_shca *shca, int ret; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; + hipz_acl |= 0x00000001; h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, (u64)iova_start, size, hipz_acl, @@ -963,7 +974,7 @@ int ehca_reg_mr(struct ehca_shca *shca, if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca_mrmw_map_hrc_alloc(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } @@ -974,11 +985,11 @@ int ehca_reg_mr(struct ehca_shca *shca, goto ehca_reg_mr_exit1; /* successful registration */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; return 0; @@ -988,10 +999,10 @@ ehca_reg_mr_exit1: if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_pages, - pginfo->num_4k, ret); + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " "not recoverable"); } @@ -999,9 +1010,9 @@ ehca_reg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_pages, pginfo->num_4k); + pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr() */ @@ -1026,24 +1037,24 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } /* max 512 pages per shot */ - for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { - rnum = pginfo->num_4k % 512; /* last shot */ + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ if (rnum == 0) - rnum = 512; /* last shot is full */ + rnum = MAX_RPAGES; /* last shot is full */ } else - rnum = 512; + rnum = MAX_RPAGES; - if (rnum > 1) { - ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " "bad rc, ret=%x rnum=%x kpage=%p", ret, rnum, kpage); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { rpage = virt_to_abs(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", @@ -1051,21 +1062,14 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = -EFAULT; goto ehca_reg_mr_rpages_exit1; } - } else { /* rnum==1 */ - ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " - "bad rc, ret=%x i=%x", ret, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } + } else + rpage = *kpage; h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, 0, /* pagesize 4k */ 0, rpage, rnum); - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { /* * check for 'registration complete'==H_SUCCESS * and for 'page registered'==H_PAGE_REGISTERED @@ -1078,7 +1082,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1089,7 +1093,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); - ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1101,8 +1105,8 @@ ehca_reg_mr_rpages_exit1: ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, - pginfo->num_pages, pginfo->num_4k); + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1124,7 +1128,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, u64 *kpage; u64 rpage; struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1137,12 +1141,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, } pginfo_save = *pginfo; - ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", - e_mr, pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k,kpage); + "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } rpage = virt_to_abs(kpage); @@ -1164,7 +1168,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; - } else if ((u64*)hipzout.vaddr != iova_start) { + } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, @@ -1176,11 +1180,11 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * successful reregistration * note: start and start_out are identical for eServer HCAs */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; } @@ -1190,9 +1194,9 @@ ehca_rereg_mr_rereg1_exit1: ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx", - ret, *lkey, *rkey, pginfo, pginfo->num_pages, - pginfo->num_4k); + "pginfo=%p num_kpages=%lx num_hwpages=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); return ret; } /* end ehca_rereg_mr_rereg1() */ @@ -1214,10 +1218,12 @@ int ehca_rereg_mr(struct ehca_shca *shca, int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ /* first determine reregistration hCall(s) */ - if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || - (pginfo->num_4k > e_mr->num_4k)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " - "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; } @@ -1253,7 +1259,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_rereg_mr_exit0; } /* clean ehca_mr_t, without changing struct ib_mr and lock */ @@ -1281,9 +1287,9 @@ ehca_rereg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); return ret; } /* end ehca_rereg_mr() */ @@ -1295,97 +1301,86 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, { int ret = 0; u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ struct ehca_pd *e_pd = container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); struct ehca_mr save_fmr; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; - /* first check if reregistration hCall can be used for unmap */ - if (e_fmr->fmr_max_pages > 512) { - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (rereg_1_hcall) { + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { /* * note: after using rereg hcall with len=0, * rereg hcall must be used again for registering pages */ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - rereg_3_hcall = 1; - } else { + if (h_ret == H_SUCCESS) { /* successful reregistration */ e_fmr->start = NULL; e_fmr->size = 0; tmp_lkey = hipzout.lkey; tmp_rkey = hipzout.rkey; + return 0; } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ } - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = 0; - pginfo.num_4k = 0; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_unmap_one_fmr_exit0; - } + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = 0; + pginfo.num_hwpages = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; } ehca_unmap_one_fmr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, - rereg_1_hcall, rereg_3_hcall); + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); return ret; } /* end ehca_unmap_one_fmr() */ @@ -1403,7 +1398,7 @@ int ehca_reg_smr(struct ehca_shca *shca, int ret = 0; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1419,15 +1414,15 @@ int ehca_reg_smr(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_smr_exit0; } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1453,10 +1448,10 @@ int ehca_reg_internal_maxmr( struct ehca_mr *e_mr; u64 *iova_start; u64 size_maxmr; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; struct ib_phys_buf ib_pbuf; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; e_mr = ehca_mr_new(); if (!e_mr) { @@ -1468,28 +1463,29 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64*)KERNELBASE; + iova_start = (u64 *)KERNELBASE; ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = 1; - pginfo.phys_buf_array = &ib_pbuf; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + size_maxmr, + EHCA_PAGESIZE); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " - "num_pages_4k=%x", e_mr, iova_start, size_maxmr, - num_pages_mr, num_pages_4k); + "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; } @@ -1524,7 +1520,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca, u64 h_ret; struct ehca_mr *e_origmr = shca->maxmr; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1538,14 +1534,14 @@ int ehca_reg_maxmr(struct ehca_shca *shca, h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - return ehca_mrmw_map_hrc_reg_smr(h_ret); + return ehca2ib_return_code(h_ret); } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1677,299 +1673,187 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, /*----------------------------------------------------------------------*/ -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; struct ib_umem_chunk *prev_chunk; struct ib_umem_chunk *chunk; - struct ib_phys_buf *pbuf; - u64 *fmrlist; - u64 num4k, pgaddr, offs4k; + u64 pgaddr; u32 i = 0; u32 j = 0; - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - while (pginfo->next_4k < offs4k + num4k) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, " - "page_cnt=%lx " - "num_pages=%lx " - "page_4k_cnt=%lx " - "num_4k=%lx i=%x", - pginfo->page_cnt, - pginfo->num_pages, - pginfo->page_4k_cnt, - pginfo->num_4k, i); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - *kpage = phys_to_abs( - (pbuf->addr & EHCA_PAGEMASK) - + (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx " - "pbuf->size=%lx " - "next_4k=%lx", pbuf->addr, - pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - /* loop over desired chunk entries */ - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - for (i = pginfo->next_nmap; i < chunk->nmap; ) { - pgaddr = ( page_to_pfn(chunk->page_list[i].page) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr + - (pginfo->next_4k * - EHCA_PAGESIZE)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_4k=%lx mr=%p", - pgaddr, - (u64)sg_dma_address( - &chunk-> - page_list[i]), - i, pginfo->next_4k, e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - kpage++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; - i++; - } - j++; - if (j >= number) break; - } - if ((pginfo->next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) - break; - else - prev_chunk = chunk; - } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - /* loop over desired page_list entries */ - fmrlist = pginfo->page_list + pginfo->next_listelem; - for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + pgaddr = page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ; + *kpage = phys_to_abs(pgaddr + + (pginfo->next_hwpage * + EHCA_PAGESIZE)); if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, - pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_hwpage=%lx", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; kpage++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - fmrlist++; - pginfo->next_4k = 0; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + i++; } + j++; + if (j >= number) break; } - } else { - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; } - -ehca_set_pagebuf_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " - "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " - "next_listelem=%lx region=%p next_chunk=%p " - "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, - pginfo->num_pages, pginfo->num_4k, - pginfo->next_buf, pginfo->next_4k, number, kpage, - pginfo->page_cnt, pginfo->page_4k_cnt, i, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ +} -/* setup 1 page from page info page buffer */ -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage) +int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; - struct ib_phys_buf *tmp_pbuf; - u64 *fmrlist; - struct ib_umem_chunk *chunk; - struct ib_umem_chunk *prev_chunk; - u64 pgaddr, num4k, offs4k; - - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " - "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", - pginfo->page_cnt, pginfo->num_pages, - pginfo->page_4k_cnt, pginfo->num_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) && tmp_pbuf->addr ) { - ehca_gen_err("tmp_pbuf->addr=%lx" - " tmp_pbuf->size=%lx next_4k=%lx", - tmp_pbuf->addr, tmp_pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - pgaddr = ( page_to_pfn(chunk->page_list[ - pginfo->next_nmap].page) - << PAGE_SHIFT); - *rpage = phys_to_abs(pgaddr + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) ) { - ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" - " next_nmap=%lx next_4k=%lx mr=%p", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[ - pginfo-> - next_nmap]), - pginfo->next_nmap, pginfo->next_4k, - e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % EHCA_PAGESIZE) + + pbuf->size, EHCA_PAGESIZE); + offs_hw = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%lx num_kpages=%lx " + "hwpage_cnt=%lx " + "num_hwpages=%lx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; } - if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; + *kpage = phys_to_abs( + (pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_hwpage * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx " + "pbuf->size=%lx " + "next_hwpage=%lx", pbuf->addr, + pbuf->size, + pginfo->next_hwpage); + return -EFAULT; } - break; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->kpage_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - fmrlist = pginfo->page_list + pginfo->next_listelem; - *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); - if ( !(*rpage) ) { + } + return ret; +} + +int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_hwpage * EHCA_PAGESIZE); + if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + "next_listelem=%lx next_hwpage=%lx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - pginfo->next_4k = 0; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; } - } else { + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = ehca_set_pagebuf_user1(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: ehca_gen_err("bad pginfo->type=%x", pginfo->type); ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + break; } - -ehca_set_pagebuf_1_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " - "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " - "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, - pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, - rpage, pginfo->page_cnt, pginfo->page_4k_cnt, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); return ret; -} /* end ehca_set_pagebuf_1() */ +} /* end ehca_set_pagebuf() */ /*----------------------------------------------------------------------*/ @@ -1982,7 +1866,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void*)KERNELBASE)) { + (iova_start == (void *)KERNELBASE)) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2042,196 +1926,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, /*----------------------------------------------------------------------*/ /* - * map HIPZ rc to IB retcodes for MR/MW allocations - * Used for hipz_mr_reg_alloc and hipz_mw_alloc. - */ -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_alloc() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering last page - */ -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* registration complete */ - return 0; - case H_PAGE_REGISTERED: /* page registered */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_last() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering one page, but not last page - */ -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_PAGE_REGISTERED: /* page registered */ - return 0; - case H_SUCCESS: /* registration complete */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ - -/*----------------------------------------------------------------------*/ - -/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_query_mr() */ - -/*----------------------------------------------------------------------*/ -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MR resource - * Used for hipz_h_free_resource_mr - */ -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mr() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MW resource - * Used for hipz_h_free_resource_mw - */ -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mw() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for SMR registrations - * Used for hipz_h_register_smr. - */ -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_MEM_PARM: /* invalid MR virtual address */ - case H_MEM_ACCESS_PARM: /* invalid access controls */ - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_reg_smr() */ - -/*----------------------------------------------------------------------*/ - -/* * MR destructor and constructor * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, * except struct ib_mr and spinlock */ void ehca_mr_deletenew(struct ehca_mr *mr) { - mr->flags = 0; - mr->num_pages = 0; - mr->num_4k = 0; - mr->acl = 0; - mr->start = NULL; + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; mr->fmr_page_size = 0; mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); memset(&mr->galpas, 0, sizeof(mr->galpas)); - mr->nr_of_pages = 0; - mr->pagearray = NULL; } /* end ehca_mr_deletenew() */ int ehca_init_mrmw_cache(void) diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index d936e40a574..24f13fe3708 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -101,15 +101,10 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, u64 *page_list, int list_len); -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, u32 number, u64 *kpage); -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage); - int ehca_mr_is_maxmr(u64 size, u64 *iova_start); @@ -121,20 +116,6 @@ void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, int *ib_acl); -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); - void ehca_mr_deletenew(struct ehca_mr *mr); #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 8707d297ce4..818803057eb 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -53,13 +53,13 @@ struct ehca_vsgentry { u32 length; }; -#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) /* * Unreliable Datagram Address Vector Format @@ -206,10 +206,10 @@ struct ehca_wqe { }; -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) -#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 74671250303..48e9ceacd6f 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -602,10 +602,10 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, /* UD circumvention */ parms.act_nr_send_sges -= 2; parms.act_nr_recv_sges -= 2; - swqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_recv_sges]); + swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_recv_sges]); } if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { @@ -690,8 +690,8 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, if (my_qp->send_cq) { ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { - ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", - ret); + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%x", ret); goto create_qp_exit4; } } @@ -749,7 +749,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ehca_qp *ret; ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; } int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, @@ -780,7 +780,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); if (IS_ERR(my_qp)) - return (struct ib_srq *) my_qp; + return (struct ib_srq *)my_qp; /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; @@ -875,7 +875,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } - bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ @@ -890,7 +890,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, } /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { if (ehca_debug_level) @@ -898,7 +898,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = (*bad_wqe_cnt)+1; } /* @@ -1003,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " "new qp_state=%x attribute_mask=%x", my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); @@ -1019,7 +1019,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); else { ret = -EINVAL; @@ -1077,7 +1078,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ - wqe = (struct ehca_wqe*) + wqe = (struct ehca_wqe *) ipz_qeit_get(&my_qp->ipz_squeue); wqe->optype = wqe->wqef = 0xff; ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", @@ -1312,7 +1313,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " - "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1411,7 +1412,7 @@ int ehca_query_qp(struct ib_qp *qp, } if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device,"Invalid attribute mask " + ehca_err(qp->device, "Invalid attribute mask " "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", my_qp, qp->qp_num, qp_attr_mask); return -EINVAL; @@ -1419,7 +1420,7 @@ int ehca_query_qp(struct ib_qp *qp, qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { - ehca_err(qp->device,"Out of memory for qpcb " + ehca_err(qp->device, "Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); return -ENOMEM; } @@ -1431,7 +1432,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device,"hipz_h_query_qp() failed " + ehca_err(qp->device, "hipz_h_query_qp() failed " "ehca_qp=%p qp_num=%x h_ret=%lx", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; @@ -1442,7 +1443,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_attr->cur_qp_state == -EINVAL) { ret = -EINVAL; - ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " "ehca_qp=%p qp_num=%x", qpcb->qp_state, my_qp, qp->qp_num); goto query_qp_exit1; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 61da65e6e5e..94eed70fedf 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -79,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, } if (ehca_debug_level) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } @@ -99,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; struct ib_sge *sge = send_wr->sg_list; ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x",idx, send_wr->wr_id, + "send_flags=%x opcode=%x", idx, send_wr->wr_id, send_wr->num_sge, send_wr->send_flags, send_wr->opcode); if (mad_hdr) { @@ -116,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *) abs_to_virt(sge->addr); + u8 *data = (u8 *)abs_to_virt(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -534,9 +535,11 @@ poll_cq_one_read_cqe: cqe_count++; if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + struct ehca_qp *qp; int purgeflag; unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -551,8 +554,8 @@ poll_cq_one_read_cqe: spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { - ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " - "src_qp=%x", + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); if (ehca_debug_level) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 03b185f873d..678b8139186 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -93,14 +93,14 @@ extern int ehca_debug_level; #define ehca_gen_dbg(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) #define ehca_gen_warn(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) @@ -114,12 +114,12 @@ extern int ehca_debug_level; * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> */ #define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ + do { \ + unsigned int x; \ unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char*)(adr); \ + unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s " format \ + printk(KERN_INFO "EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ @@ -128,16 +128,16 @@ extern int ehca_debug_level; } while (0) /* define a bitmask, little endian version */ -#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) /* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) /* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) /* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) /** * EHCA_BMASK_SET - return value shifted and masked by mask @@ -145,14 +145,14 @@ extern int ehca_debug_level; * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask * in variable */ -#define EHCA_BMASK_SET(mask,value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) /** * EHCA_BMASK_GET - extract a parameter from value by mask */ -#define EHCA_BMASK_GET(mask,value) \ - (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) /* Converts ehca to ib return code */ @@ -161,8 +161,11 @@ static inline int ehca2ib_return_code(u64 ehca_rc) switch (ehca_rc) { case H_SUCCESS: return 0; + case H_RESOURCE: /* Resource in use */ case H_BUSY: return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ case H_NO_MEM: return -ENOMEM; default: diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 3031b3bb56f..05c415744e3 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context) static void ehca_mm_open(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma) static void ehca_mm_close(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 2: /* qp rqueue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", @@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 3: /* qp squeue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 4776a8b0fee..3394e05f4b4 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -501,8 +501,8 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, return H_PARAMETER; } - return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, - qp_handle.handle,logical_address_of_page, + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, count); } @@ -522,9 +522,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void*)outs[0]; + *log_addr_next_sq_wqe2processed = (void *)outs[0]; if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void*)outs[1]; + *log_addr_next_rq_wqe2processed = (void *)outs[1]; return ret; } diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 0b1a4772c78..214821095cb 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr) int hcall_unmap_page(u64 mapaddr) { - iounmap((volatile void __iomem*)mapaddr); + iounmap((volatile void __iomem *) mapaddr); return 0; } diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h index 20898a15344..868735fd318 100644 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -53,10 +53,10 @@ #define hipz_galpa_load_cq(gal, offset) \ hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) -#define hipz_galpa_store_qp(gal,offset, value) \ +#define hipz_galpa_store_qp(gal, offset, value) \ hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) #define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) { diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index dad6dea5636..d9739e55451 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -161,11 +161,11 @@ struct hipz_qptemm { /* 0x1000 */ }; -#define QPX_SQADDER EHCA_BMASK_IBM(48,63) -#define QPX_RQADDER EHCA_BMASK_IBM(48,63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) /* MRMWPT Entry Memory Map */ struct hipz_mrmwmm { @@ -187,7 +187,7 @@ struct hipz_mrmwmm { }; -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) struct hipz_qpedmm { /* 0x00 */ @@ -238,7 +238,7 @@ struct hipz_qpedmm { u64 qpedx_rrva3; }; -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) /* CQ Table Entry Memory Map */ struct hipz_cqtemm { @@ -263,12 +263,12 @@ struct hipz_cqtemm { /* 0x1000 */ }; -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) -#define CQX_FECADDER EHCA_BMASK_IBM(32,63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) /* EQ Table Entry Memory Map */ struct hipz_eqtemm { @@ -293,7 +293,7 @@ struct hipz_eqtemm { }; -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) /* access control defines for MR/MW */ #define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index bf7a40088f6..9606f13ed09 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -114,7 +114,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, */ f = 0; while (f < nr_of_pages) { - u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); int k; if (!kpage) goto ipz_queue_ctor_exit0; /*NOMEM*/ diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 007f0882fd4..39a4f64aff4 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -240,7 +240,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; ipz_qeit_eq_get_inc(queue); /* this is a good one */ @@ -250,7 +250,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 9361f5ab8bd..09c5fd84b1e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1889,7 +1889,7 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, /* Below is "non-zero" to force override, but both actual LEDs are off */ #define LED_OVER_BOTH_OFF (8) -void ipath_run_led_override(unsigned long opaque) +static void ipath_run_led_override(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *)opaque; int timeoff; diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 6b9147964a4..b4503e9c1e9 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -426,8 +426,8 @@ bail: * @buffer: data to write * @len: number of bytes to write */ -int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) +static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) { u8 single_byte; int sub_len; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 47aa43428fb..1fd91c59f24 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -70,7 +70,7 @@ static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) * If rewrite is true, and bits are set in the sendbufferror registers, * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) +static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) { u32 piobcnt; unsigned long sbuf[4]; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 3105005fc9d..ace63ef78e6 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -776,7 +776,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* * Set LED override, only the two LSBs have "public" meaning, but @@ -820,7 +819,6 @@ static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) #define IPATH_MDIO_CTRL_8355_REG_10 0x1D int ipath_get_user_pages(unsigned long, size_t, struct page **); -int ipath_get_user_pages_nocopy(unsigned long, struct page **); void ipath_release_user_pages(struct page **, size_t); void ipath_release_user_pages_on_close(struct page **, size_t); int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 85256747d8a..c69c2523944 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -507,7 +507,7 @@ static int want_buffer(struct ipath_devdata *dd) * * Called when we run out of PIO buffers. */ -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) +static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 27034d38b3d..0190edc8044 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -171,32 +171,6 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages, return ret; } -/** - * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared - * @start_page: the page to lock - * @p: the output page structure - * - * This is similar to ipath_get_user_pages, but it's always one page, and we - * mark the page as locked for I/O, and shared. This is used for the user - * process page that contains the destination address for the rcvhdrq tail - * update, so we need to have the vma. If we don't do this, the page can be - * taken away from us on fork, even if the child never touches it, and then - * the user process never sees the tail register updates. - */ -int ipath_get_user_pages_nocopy(unsigned long page, struct page **p) -{ - struct vm_area_struct *vma; - int ret; - - down_write(¤t->mm->mmap_sem); - - ret = __get_user_pages(page, 1, p, &vma); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - void ipath_release_user_pages(struct page **p, size_t num_pages) { down_write(¤t->mm->mmap_sem); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 65f7181e9cf..16aa61fd808 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -488,7 +488,7 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -void ipath_ib_timer(struct ipath_ibdev *dev) +static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; struct list_head *last; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index f3d1f2cee6f..9bbe81967f1 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -782,8 +782,6 @@ void ipath_update_mmap_info(struct ipath_ibdev *dev, int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); - void ipath_insert_rnr_queue(struct ipath_qp *qp); int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); @@ -807,8 +805,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); int ipath_ib_piobufavail(struct ipath_ibdev *); -void ipath_ib_timer(struct ipath_ibdev *); - unsigned ipath_get_npkeys(struct ipath_devdata *); u32 ipath_get_cr_errpkey(struct ipath_devdata *); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 40042184ad5..b5a24fbef70 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1183,6 +1183,43 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1238,26 +1275,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); + size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; @@ -1266,15 +1290,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; - break; default: @@ -1284,13 +1303,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, - &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; @@ -1313,12 +1326,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mlx4_wqe_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mlx4_wqe_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mlx4_wqe_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mlx4_wqe_data_seg); size += sizeof (struct mlx4_wqe_data_seg) / 16; @@ -1498,7 +1506,7 @@ static int to_ib_qp_access_flags(int mlx4_flags) static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { - memset(ib_ah_attr, 0, sizeof *path); + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) @@ -1515,7 +1523,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = - be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } @@ -1560,7 +1568,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; - qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; @@ -1578,17 +1589,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr done: qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; - qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - - send_wqe_overhead(qp->ibqp.qp_type) - - sizeof (struct mlx4_wqe_inline_seg); - qp_init_attr->cap = qp_attr->cap; + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; } + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + return 0; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index aa563e61de6..76fed7545c5 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); static int msi = 0; module_param(msi, int, 0444); -MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); +MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)"); #else /* CONFIG_PCI_MSI */ @@ -1117,9 +1117,21 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; - if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) && - !pci_enable_msi(pdev)) - mdev->mthca_flags |= MTHCA_FLAG_MSI; + else if (msi) { + static int warned; + + if (!warned) { + printk(KERN_WARNING PFX "WARNING: MSI support will be " + "removed from the ib_mthca driver in January 2008.\n"); + printk(KERN_WARNING " If you are using MSI and cannot " + "switch to MSI-X, please tell " + "<general@lists.openfabrics.org>.\n"); + ++warned; + } + + if (!pci_enable_msi(pdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI; + } if (mthca_cmd_init(mdev)) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); @@ -1135,7 +1147,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { - mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 11f1d99db40..df01b2026a6 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1578,6 +1578,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, return cur + nreq >= wq->max; } +static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, + struct ib_send_wr *wr) +{ + useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, + struct ib_send_wr *wr) +{ + memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1590,8 +1629,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1636,25 +1682,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1663,12 +1695,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1683,12 +1712,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1700,16 +1726,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - ((struct mthca_tavor_ud_seg *) wqe)->lkey = - cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - ((struct mthca_tavor_ud_seg *) wqe)->av_addr = - cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - ((struct mthca_tavor_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_tavor_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_tavor_ud_seg); + set_tavor_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; @@ -1734,13 +1752,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1768,11 +1781,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size | ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -1822,7 +1835,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int nreq; int i; int size; - int size0 = 0; + /* + * size0 is only used if nreq != 0, and it will always be + * initialized the first time through the main loop before + * nreq is incremented. So nreq cannot become non-zero + * without initializing size0, and it is in fact never used + * uninitialized. + */ + int uninitialized_var(size0); int ind; void *wqe; void *prev_wqe; @@ -1863,13 +1883,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1881,7 +1896,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD | size); - if (!size0) + if (!nreq) size0 = size; ++ind; @@ -1903,7 +1918,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->rq.next_ind = ind; qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; - size0 = 0; } } @@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1966,7 +1987,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; - size0 = 0; /* * Make sure that descriptors are written before @@ -2017,26 +2037,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - - wqe += sizeof (struct mthca_atomic_seg); + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; break; @@ -2044,12 +2050,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2064,12 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2081,14 +2081,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, - to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - ((struct mthca_arbel_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_arbel_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_arbel_ud_seg); + set_arbel_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; @@ -2113,13 +2107,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -2151,7 +2140,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -2241,20 +2230,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < qp->rq.max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < qp->rq.max_gs) + mthca_set_data_seg_inval(wqe); qp->wrid[ind] = wr->wr_id; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index b8f05a52667..88d219e730a 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -543,20 +543,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); ((struct mthca_next_seg *) prev_wqe)->nda_op = cpu_to_be32((ind << srq->wqe_shift) | 1); @@ -662,20 +654,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index e7d2c1e8619..f6a66fe78e4 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -113,4 +113,19 @@ struct mthca_mlx_seg { __be16 vcrc; }; +static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg) +{ + dseg->byte_count = 0; + dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + dseg->addr = 0; +} + #endif /* MTHCA_WQE_H */ diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e2353701e8b..1ee867b1b34 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -310,8 +310,6 @@ int iser_conn_init(struct iser_conn **ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); -void iser_conn_release(struct iser_conn *ib_conn); - void iser_rcv_completion(struct iser_desc *desc, unsigned long dto_xfer_len); @@ -329,9 +327,6 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, - enum iser_data_dir cmd_dir); - void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, enum iser_data_dir cmd_dir); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index fc9f1fd0ae5..36cdf77ae92 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -103,8 +103,8 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, - enum iser_data_dir cmd_dir) +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2044de1164a..d42ec0156ee 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -311,6 +311,29 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, } /** + * Frees all conn objects and deallocs conn descriptor + */ +static void iser_conn_release(struct iser_conn *ib_conn) +{ + struct iser_device *device = ib_conn->device; + + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + + mutex_lock(&ig.connlist_mutex); + list_del(&ib_conn->conn_list); + mutex_unlock(&ig.connlist_mutex); + + iser_free_ib_conn_res(ib_conn); + ib_conn->device = NULL; + /* on EVENT_ADDR_ERROR there's no device yet for this conn */ + if (device != NULL) + iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; + kfree(ib_conn); +} + +/** * triggers start of the disconnect procedures and wait for them to be done */ void iser_conn_terminate(struct iser_conn *ib_conn) @@ -550,30 +573,6 @@ connect_failure: } /** - * Frees all conn objects and deallocs conn descriptor - */ -void iser_conn_release(struct iser_conn *ib_conn) -{ - struct iser_device *device = ib_conn->device; - - BUG_ON(ib_conn->state != ISER_CONN_DOWN); - - mutex_lock(&ig.connlist_mutex); - list_del(&ib_conn->conn_list); - mutex_unlock(&ig.connlist_mutex); - - iser_free_ib_conn_res(ib_conn); - ib_conn->device = NULL; - /* on EVENT_ADDR_ERROR there's no device yet for this conn */ - if (device != NULL) - iser_device_try_release(device); - if (ib_conn->iser_conn) - ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); -} - - -/** * iser_reg_page_vec - Register physical memory * * returns: 0 on success, errno code on failure diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index 5a7b49c3553..b10ffae7c39 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -117,15 +117,13 @@ static int amba_kmi_probe(struct amba_device *dev, void *id) if (ret) return ret; - kmi = kmalloc(sizeof(struct amba_kmi_port), GFP_KERNEL); - io = kmalloc(sizeof(struct serio), GFP_KERNEL); + kmi = kzalloc(sizeof(struct amba_kmi_port), GFP_KERNEL); + io = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!kmi || !io) { ret = -ENOMEM; goto out; } - memset(kmi, 0, sizeof(struct amba_kmi_port)); - memset(io, 0, sizeof(struct serio)); io->id.type = SERIO_8042; io->write = amba_kmi_write; diff --git a/drivers/input/serio/pcips2.c b/drivers/input/serio/pcips2.c index ea5e3c6ddb6..1b404f9e3bf 100644 --- a/drivers/input/serio/pcips2.c +++ b/drivers/input/serio/pcips2.c @@ -140,15 +140,13 @@ static int __devinit pcips2_probe(struct pci_dev *dev, const struct pci_device_i if (ret) goto disable; - ps2if = kmalloc(sizeof(struct pcips2_data), GFP_KERNEL); - serio = kmalloc(sizeof(struct serio), GFP_KERNEL); + ps2if = kzalloc(sizeof(struct pcips2_data), GFP_KERNEL); + serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!ps2if || !serio) { ret = -ENOMEM; goto release; } - memset(ps2if, 0, sizeof(struct pcips2_data)); - memset(serio, 0, sizeof(struct serio)); serio->id.type = SERIO_8042; serio->write = pcips2_write; diff --git a/drivers/input/serio/sa1111ps2.c b/drivers/input/serio/sa1111ps2.c index d31ece8f68e..2ad88780a17 100644 --- a/drivers/input/serio/sa1111ps2.c +++ b/drivers/input/serio/sa1111ps2.c @@ -234,15 +234,13 @@ static int __devinit ps2_probe(struct sa1111_dev *dev) struct serio *serio; int ret; - ps2if = kmalloc(sizeof(struct ps2if), GFP_KERNEL); - serio = kmalloc(sizeof(struct serio), GFP_KERNEL); + ps2if = kzalloc(sizeof(struct ps2if), GFP_KERNEL); + serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!ps2if || !serio) { ret = -ENOMEM; goto free; } - memset(ps2if, 0, sizeof(struct ps2if)); - memset(serio, 0, sizeof(struct serio)); serio->id.type = SERIO_8042; serio->write = ps2_write; diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index 5f7907e5709..97097ef3491 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1146,14 +1146,12 @@ static int hisax_cs_setup(int cardnr, struct IsdnCard *card, } if (ret) { closecard(cardnr); - ret = 0; goto outf_cs; } init_tei(cs, cs->protocol); ret = CallcNewChan(cs); if (ret) { closecard(cardnr); - ret = 0; goto outf_cs; } /* ISAR needs firmware download first */ @@ -1165,7 +1163,7 @@ static int hisax_cs_setup(int cardnr, struct IsdnCard *card, outf_cs: kfree(cs); card->cs = NULL; - return ret; + return 0; } static int checkcard(int cardnr, char *id, int *busy_flag, struct module *lockowner) diff --git a/drivers/isdn/sc/card.h b/drivers/isdn/sc/card.h index 4fbfa825c3a..5992f63c383 100644 --- a/drivers/isdn/sc/card.h +++ b/drivers/isdn/sc/card.h @@ -125,7 +125,7 @@ int sendmessage(int card, unsigned int procid, unsigned int type, int receivemessage(int card, RspMessage *rspmsg); int sc_ioctl(int card, scs_ioctl *data); int setup_buffers(int card, int c); -void check_reset(unsigned long data); +void sc_check_reset(unsigned long data); void check_phystat(unsigned long data); #endif /* CARD_H */ diff --git a/drivers/isdn/sc/command.c b/drivers/isdn/sc/command.c index b7bb7cbcf50..0e4969c2ef9 100644 --- a/drivers/isdn/sc/command.c +++ b/drivers/isdn/sc/command.c @@ -344,7 +344,7 @@ int reset(int card) spin_lock_irqsave(&sc_adapter[card]->lock, flags); init_timer(&sc_adapter[card]->reset_timer); - sc_adapter[card]->reset_timer.function = check_reset; + sc_adapter[card]->reset_timer.function = sc_check_reset; sc_adapter[card]->reset_timer.data = card; sc_adapter[card]->reset_timer.expires = jiffies + CHECKRESET_TIME; add_timer(&sc_adapter[card]->reset_timer); diff --git a/drivers/isdn/sc/timer.c b/drivers/isdn/sc/timer.c index cc1b8861be2..91fbe0dc28e 100644 --- a/drivers/isdn/sc/timer.c +++ b/drivers/isdn/sc/timer.c @@ -43,7 +43,7 @@ static void setup_ports(int card) * Then, check to see if the signate has been set. Next, set the * signature to a known value and issue a startproc if needed. */ -void check_reset(unsigned long data) +void sc_check_reset(unsigned long data) { unsigned long flags; unsigned long sig; diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig new file mode 100644 index 00000000000..43d901fdc77 --- /dev/null +++ b/drivers/lguest/Kconfig @@ -0,0 +1,20 @@ +config LGUEST + tristate "Linux hypervisor example code" + depends on X86 && PARAVIRT && NET && EXPERIMENTAL && !X86_PAE + select LGUEST_GUEST + select HVC_DRIVER + ---help--- + This is a very simple module which allows you to run + multiple instances of the same Linux kernel, using the + "lguest" command found in the Documentation/lguest directory. + Note that "lguest" is pronounced to rhyme with "fell quest", + not "rustyvisor". See Documentation/lguest/lguest.txt. + + If unsure, say N. If curious, say M. If masochistic, say Y. + +config LGUEST_GUEST + bool + help + The guest needs code built-in, even if the host has lguest + support as a module. The drivers are tiny, so we build them + in too. diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile new file mode 100644 index 00000000000..55382c7d799 --- /dev/null +++ b/drivers/lguest/Makefile @@ -0,0 +1,7 @@ +# Guest requires the paravirt_ops replacement and the bus driver. +obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o + +# Host requires the other files, which can be a module. +obj-$(CONFIG_LGUEST) += lg.o +lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ + segments.o io.o lguest_user.o switcher.o diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c new file mode 100644 index 00000000000..ce909ec5749 --- /dev/null +++ b/drivers/lguest/core.c @@ -0,0 +1,462 @@ +/* World's simplest hypervisor, to test paravirt_ops and show + * unbelievers that virtualization is the future. Plus, it's fun! */ +#include <linux/module.h> +#include <linux/stringify.h> +#include <linux/stddef.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <asm/paravirt.h> +#include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/poll.h> +#include <asm/highmem.h> +#include <asm/asm-offsets.h> +#include <asm/i387.h> +#include "lg.h" + +/* Found in switcher.S */ +extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; +extern unsigned long default_idt_entries[]; + +/* Every guest maps the core switcher code. */ +#define SHARED_SWITCHER_PAGES \ + DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) +/* Pages for switcher itself, then two pages per cpu */ +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) + +/* We map at -4M for ease of mapping into the guest (one PTE page). */ +#define SWITCHER_ADDR 0xFFC00000 + +static struct vm_struct *switcher_vma; +static struct page **switcher_page; + +static int cpu_had_pge; +static struct { + unsigned long offset; + unsigned short segment; +} lguest_entry; + +/* This One Big lock protects all inter-guest data structures. */ +DEFINE_MUTEX(lguest_lock); +static DEFINE_PER_CPU(struct lguest *, last_guest); + +/* FIXME: Make dynamic. */ +#define MAX_LGUEST_GUESTS 16 +struct lguest lguests[MAX_LGUEST_GUESTS]; + +/* Offset from where switcher.S was compiled to where we've copied it */ +static unsigned long switcher_offset(void) +{ + return SWITCHER_ADDR - (unsigned long)start_switcher_text; +} + +/* This cpu's struct lguest_pages. */ +static struct lguest_pages *lguest_pages(unsigned int cpu) +{ + return &(((struct lguest_pages *) + (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); +} + +static __init int map_switcher(void) +{ + int i, err; + struct page **pagep; + + switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, + GFP_KERNEL); + if (!switcher_page) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { + unsigned long addr = get_zeroed_page(GFP_KERNEL); + if (!addr) { + err = -ENOMEM; + goto free_some_pages; + } + switcher_page[i] = virt_to_page(addr); + } + + switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, + VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); + if (!switcher_vma) { + err = -ENOMEM; + printk("lguest: could not map switcher pages high\n"); + goto free_pages; + } + + pagep = switcher_page; + err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); + if (err) { + printk("lguest: map_vm_area failed: %i\n", err); + goto free_vma; + } + memcpy(switcher_vma->addr, start_switcher_text, + end_switcher_text - start_switcher_text); + + /* Fix up IDT entries to point into copied text. */ + for (i = 0; i < IDT_ENTRIES; i++) + default_idt_entries[i] += switcher_offset(); + + for_each_possible_cpu(i) { + struct lguest_pages *pages = lguest_pages(i); + struct lguest_ro_state *state = &pages->state; + + /* These fields are static: rest done in copy_in_guest_info */ + state->host_gdt_desc.size = GDT_SIZE-1; + state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); + store_idt(&state->host_idt_desc); + state->guest_idt_desc.size = sizeof(state->guest_idt)-1; + state->guest_idt_desc.address = (long)&state->guest_idt; + state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; + state->guest_gdt_desc.address = (long)&state->guest_gdt; + state->guest_tss.esp0 = (long)(&pages->regs + 1); + state->guest_tss.ss0 = LGUEST_DS; + /* No I/O for you! */ + state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); + setup_default_gdt_entries(state); + setup_default_idt_entries(state, default_idt_entries); + + /* Setup LGUEST segments on all cpus */ + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + } + + /* Initialize entry point into switcher. */ + lguest_entry.offset = (long)switch_to_guest + switcher_offset(); + lguest_entry.segment = LGUEST_CS; + + printk(KERN_INFO "lguest: mapped switcher at %p\n", + switcher_vma->addr); + return 0; + +free_vma: + vunmap(switcher_vma->addr); +free_pages: + i = TOTAL_SWITCHER_PAGES; +free_some_pages: + for (--i; i >= 0; i--) + __free_pages(switcher_page[i], 0); + kfree(switcher_page); +out: + return err; +} + +static void unmap_switcher(void) +{ + unsigned int i; + + vunmap(switcher_vma->addr); + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) + __free_pages(switcher_page[i], 0); +} + +/* IN/OUT insns: enough to get us past boot-time probing. */ +static int emulate_insn(struct lguest *lg) +{ + u8 insn; + unsigned int insnlen = 0, in = 0, shift = 0; + unsigned long physaddr = guest_pa(lg, lg->regs->eip); + + /* This only works for addresses in linear mapping... */ + if (lg->regs->eip < lg->page_offset) + return 0; + lgread(lg, &insn, physaddr, 1); + + /* Operand size prefix means it's actually for ax. */ + if (insn == 0x66) { + shift = 16; + insnlen = 1; + lgread(lg, &insn, physaddr + insnlen, 1); + } + + switch (insn & 0xFE) { + case 0xE4: /* in <next byte>,%al */ + insnlen += 2; + in = 1; + break; + case 0xEC: /* in (%dx),%al */ + insnlen += 1; + in = 1; + break; + case 0xE6: /* out %al,<next byte> */ + insnlen += 2; + break; + case 0xEE: /* out %al,(%dx) */ + insnlen += 1; + break; + default: + return 0; + } + + if (in) { + /* Lower bit tells is whether it's a 16 or 32 bit access */ + if (insn & 0x1) + lg->regs->eax = 0xFFFFFFFF; + else + lg->regs->eax |= (0xFFFF << shift); + } + lg->regs->eip += insnlen; + return 1; +} + +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len) +{ + return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); +} + +/* Just like get_user, but don't let guest access lguest binary. */ +u32 lgread_u32(struct lguest *lg, unsigned long addr) +{ + u32 val = 0; + + /* Don't let them access lguest binary */ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || get_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad read address %#lx", addr); + return val; +} + +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) +{ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || put_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad write address %#lx", addr); +} + +void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_from_user(b, (void __user *)addr, bytes) != 0) { + /* copy_from_user should do this, but as we rely on it... */ + memset(b, 0, bytes); + kill_guest(lg, "bad read address %#lx len %u", addr, bytes); + } +} + +void lgwrite(struct lguest *lg, unsigned long addr, const void *b, + unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_to_user((void __user *)addr, b, bytes) != 0) + kill_guest(lg, "bad write address %#lx len %u", addr, bytes); +} + +static void set_ts(void) +{ + u32 cr0; + + cr0 = read_cr0(); + if (!(cr0 & 8)) + write_cr0(cr0|8); +} + +static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) +{ + if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { + __get_cpu_var(last_guest) = lg; + lg->last_pages = pages; + lg->changed = CHANGED_ALL; + } + + /* These are pretty cheap, so we do them unconditionally. */ + pages->state.host_cr3 = __pa(current->mm->pgd); + map_switcher_in_guest(lg, pages); + pages->state.guest_tss.esp1 = lg->esp1; + pages->state.guest_tss.ss1 = lg->ss1; + + /* Copy direct trap entries. */ + if (lg->changed & CHANGED_IDT) + copy_traps(lg, pages->state.guest_idt, default_idt_entries); + + /* Copy all GDT entries but the TSS. */ + if (lg->changed & CHANGED_GDT) + copy_gdt(lg, pages->state.guest_gdt); + /* If only the TLS entries have changed, copy them. */ + else if (lg->changed & CHANGED_GDT_TLS) + copy_gdt_tls(lg, pages->state.guest_gdt); + + lg->changed = 0; +} + +static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) +{ + unsigned int clobber; + + copy_in_guest_info(lg, pages); + + /* Put eflags on stack, lcall does rest: suitable for iret return. */ + asm volatile("pushf; lcall *lguest_entry" + : "=a"(clobber), "=b"(clobber) + : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) + : "memory", "%edx", "%ecx", "%edi", "%esi"); +} + +int run_guest(struct lguest *lg, unsigned long __user *user) +{ + while (!lg->dead) { + unsigned int cr2 = 0; /* Damn gcc */ + + /* Hypercalls first: we might have been out to userspace */ + do_hypercalls(lg); + if (lg->dma_is_pending) { + if (put_user(lg->pending_dma, user) || + put_user(lg->pending_key, user+1)) + return -EFAULT; + return sizeof(unsigned long)*2; + } + + if (signal_pending(current)) + return -ERESTARTSYS; + + /* If Waker set break_out, return to Launcher. */ + if (lg->break_out) + return -EAGAIN; + + maybe_do_interrupt(lg); + + try_to_freeze(); + + if (lg->dead) + break; + + if (lg->halted) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + continue; + } + + local_irq_disable(); + + /* Even if *we* don't want FPU trap, guest might... */ + if (lg->ts) + set_ts(); + + /* Don't let Guest do SYSENTER: we can't handle it. */ + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); + + run_guest_once(lg, lguest_pages(raw_smp_processor_id())); + + /* Save cr2 now if we page-faulted. */ + if (lg->regs->trapnum == 14) + cr2 = read_cr2(); + else if (lg->regs->trapnum == 7) + math_state_restore(); + + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + local_irq_enable(); + + switch (lg->regs->trapnum) { + case 13: /* We've intercepted a GPF. */ + if (lg->regs->errcode == 0) { + if (emulate_insn(lg)) + continue; + } + break; + case 14: /* We've intercepted a page fault. */ + if (demand_page(lg, cr2, lg->regs->errcode)) + continue; + + /* If lguest_data is NULL, this won't hurt. */ + if (put_user(cr2, &lg->lguest_data->cr2)) + kill_guest(lg, "Writing cr2"); + break; + case 7: /* We've intercepted a Device Not Available fault. */ + /* If they don't want to know, just absorb it. */ + if (!lg->ts) + continue; + break; + case 32 ... 255: /* Real interrupt, fall thru */ + cond_resched(); + case LGUEST_TRAP_ENTRY: /* Handled at top of loop */ + continue; + } + + if (deliver_trap(lg, lg->regs->trapnum)) + continue; + + kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", + lg->regs->trapnum, lg->regs->eip, + lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode); + } + return -ENOENT; +} + +int find_free_guest(void) +{ + unsigned int i; + for (i = 0; i < MAX_LGUEST_GUESTS; i++) + if (!lguests[i].tsk) + return i; + return -1; +} + +static void adjust_pge(void *on) +{ + if (on) + write_cr4(read_cr4() | X86_CR4_PGE); + else + write_cr4(read_cr4() & ~X86_CR4_PGE); +} + +static int __init init(void) +{ + int err; + + if (paravirt_enabled()) { + printk("lguest is afraid of %s\n", paravirt_ops.name); + return -EPERM; + } + + err = map_switcher(); + if (err) + return err; + + err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); + if (err) { + unmap_switcher(); + return err; + } + lguest_io_init(); + + err = lguest_device_init(); + if (err) { + free_pagetables(); + unmap_switcher(); + return err; + } + lock_cpu_hotplug(); + if (cpu_has_pge) { /* We have a broader idea of "global". */ + cpu_had_pge = 1; + on_each_cpu(adjust_pge, (void *)0, 0, 1); + clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + } + unlock_cpu_hotplug(); + return 0; +} + +static void __exit fini(void) +{ + lguest_device_remove(); + free_pagetables(); + unmap_switcher(); + lock_cpu_hotplug(); + if (cpu_had_pge) { + set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + on_each_cpu(adjust_pge, (void *)1, 0, 1); + } + unlock_cpu_hotplug(); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c new file mode 100644 index 00000000000..ea52ca451f7 --- /dev/null +++ b/drivers/lguest/hypercalls.c @@ -0,0 +1,192 @@ +/* Actual hypercalls, which allow guests to actually do something. + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <irq_vectors.h> +#include "lg.h" + +static void do_hcall(struct lguest *lg, struct lguest_regs *regs) +{ + switch (regs->eax) { + case LHCALL_FLUSH_ASYNC: + break; + case LHCALL_LGUEST_INIT: + kill_guest(lg, "already have lguest_data"); + break; + case LHCALL_CRASH: { + char msg[128]; + lgread(lg, msg, regs->edx, sizeof(msg)); + msg[sizeof(msg)-1] = '\0'; + kill_guest(lg, "CRASH: %s", msg); + break; + } + case LHCALL_FLUSH_TLB: + if (regs->edx) + guest_pagetable_clear_all(lg); + else + guest_pagetable_flush_user(lg); + break; + case LHCALL_GET_WALLCLOCK: { + struct timespec ts; + ktime_get_real_ts(&ts); + regs->eax = ts.tv_sec; + break; + } + case LHCALL_BIND_DMA: + regs->eax = bind_dma(lg, regs->edx, regs->ebx, + regs->ecx >> 8, regs->ecx & 0xFF); + break; + case LHCALL_SEND_DMA: + send_dma(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_GDT: + load_guest_gdt(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_IDT_ENTRY: + load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_NEW_PGTABLE: + guest_new_pagetable(lg, regs->edx); + break; + case LHCALL_SET_STACK: + guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_SET_PTE: + guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx)); + break; + case LHCALL_SET_PMD: + guest_set_pmd(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_TLS: + guest_load_tls(lg, regs->edx); + break; + case LHCALL_SET_CLOCKEVENT: + guest_set_clockevent(lg, regs->edx); + break; + case LHCALL_TS: + lg->ts = regs->edx; + break; + case LHCALL_HALT: + lg->halted = 1; + break; + default: + kill_guest(lg, "Bad hypercall %li\n", regs->eax); + } +} + +/* We always do queued calls before actual hypercall. */ +static void do_async_hcalls(struct lguest *lg) +{ + unsigned int i; + u8 st[LHCALL_RING_SIZE]; + + if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) + return; + + for (i = 0; i < ARRAY_SIZE(st); i++) { + struct lguest_regs regs; + unsigned int n = lg->next_hcall; + + if (st[n] == 0xFF) + break; + + if (++lg->next_hcall == LHCALL_RING_SIZE) + lg->next_hcall = 0; + + if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) + || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) + || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx) + || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) { + kill_guest(lg, "Fetching async hypercalls"); + break; + } + + do_hcall(lg, ®s); + if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { + kill_guest(lg, "Writing result for async hypercall"); + break; + } + + if (lg->dma_is_pending) + break; + } +} + +static void initialize(struct lguest *lg) +{ + u32 tsc_speed; + + if (lg->regs->eax != LHCALL_LGUEST_INIT) { + kill_guest(lg, "hypercall %li before LGUEST_INIT", + lg->regs->eax); + return; + } + + /* We only tell the guest to use the TSC if it's reliable. */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) + tsc_speed = tsc_khz; + else + tsc_speed = 0; + + lg->lguest_data = (struct lguest_data __user *)lg->regs->edx; + /* We check here so we can simply copy_to_user/from_user */ + if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { + kill_guest(lg, "bad guest page %p", lg->lguest_data); + return; + } + if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) + || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) + /* We reserve the top pgd entry. */ + || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) + || put_user(tsc_speed, &lg->lguest_data->tsc_khz) + || put_user(lg->guestid, &lg->lguest_data->guestid)) + kill_guest(lg, "bad guest page %p", lg->lguest_data); + + /* This is the one case where the above accesses might have + * been the first write to a Guest page. This may have caused + * a copy-on-write fault, but the Guest might be referring to + * the old (read-only) page. */ + guest_pagetable_clear_all(lg); +} + +/* Even if we go out to userspace and come back, we don't want to do + * the hypercall again. */ +static void clear_hcall(struct lguest *lg) +{ + lg->regs->trapnum = 255; +} + +void do_hypercalls(struct lguest *lg) +{ + if (unlikely(!lg->lguest_data)) { + if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + initialize(lg); + clear_hcall(lg); + } + return; + } + + do_async_hcalls(lg); + if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + do_hcall(lg, lg->regs); + clear_hcall(lg); + } +} diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c new file mode 100644 index 00000000000..d9de5bbc613 --- /dev/null +++ b/drivers/lguest/interrupts_and_traps.c @@ -0,0 +1,268 @@ +#include <linux/uaccess.h> +#include "lg.h" + +static unsigned long idt_address(u32 lo, u32 hi) +{ + return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); +} + +static int idt_type(u32 lo, u32 hi) +{ + return (hi >> 8) & 0xF; +} + +static int idt_present(u32 lo, u32 hi) +{ + return (hi & 0x8000); +} + +static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +{ + *gstack -= 4; + lgwrite_u32(lg, *gstack, val); +} + +static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +{ + unsigned long gstack; + u32 eflags, ss, irq_enable; + + /* If they want a ring change, we use new stack and push old ss/esp */ + if ((lg->regs->ss&0x3) != GUEST_PL) { + gstack = guest_pa(lg, lg->esp1); + ss = lg->ss1; + push_guest_stack(lg, &gstack, lg->regs->ss); + push_guest_stack(lg, &gstack, lg->regs->esp); + } else { + gstack = guest_pa(lg, lg->regs->esp); + ss = lg->regs->ss; + } + + /* We use IF bit in eflags to indicate whether irqs were disabled + (it's always 0, since irqs are enabled when guest is running). */ + eflags = lg->regs->eflags; + if (get_user(irq_enable, &lg->lguest_data->irq_enabled)) + irq_enable = 0; + eflags |= (irq_enable & X86_EFLAGS_IF); + + push_guest_stack(lg, &gstack, eflags); + push_guest_stack(lg, &gstack, lg->regs->cs); + push_guest_stack(lg, &gstack, lg->regs->eip); + + if (has_err) + push_guest_stack(lg, &gstack, lg->regs->errcode); + + /* Change the real stack so switcher returns to trap handler */ + lg->regs->ss = ss; + lg->regs->esp = gstack + lg->page_offset; + lg->regs->cs = (__KERNEL_CS|GUEST_PL); + lg->regs->eip = idt_address(lo, hi); + + /* Disable interrupts for an interrupt gate. */ + if (idt_type(lo, hi) == 0xE) + if (put_user(0, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Disabling interrupts"); +} + +void maybe_do_interrupt(struct lguest *lg) +{ + unsigned int irq; + DECLARE_BITMAP(blk, LGUEST_IRQS); + struct desc_struct *idt; + + if (!lg->lguest_data) + return; + + /* Mask out any interrupts they have blocked. */ + if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + sizeof(blk))) + return; + + bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + + irq = find_first_bit(blk, LGUEST_IRQS); + if (irq >= LGUEST_IRQS) + return; + + if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + return; + + /* If they're halted, we re-enable interrupts. */ + if (lg->halted) { + /* Re-enable interrupts. */ + if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Re-enabling interrupts"); + lg->halted = 0; + } else { + /* Maybe they have interrupts disabled? */ + u32 irq_enabled; + if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + irq_enabled = 0; + if (!irq_enabled) + return; + } + + idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; + if (idt_present(idt->a, idt->b)) { + clear_bit(irq, lg->irqs_pending); + set_guest_interrupt(lg, idt->a, idt->b, 0); + } +} + +static int has_err(unsigned int trap) +{ + return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); +} + +int deliver_trap(struct lguest *lg, unsigned int num) +{ + u32 lo = lg->idt[num].a, hi = lg->idt[num].b; + + if (!idt_present(lo, hi)) + return 0; + set_guest_interrupt(lg, lo, hi, has_err(num)); + return 1; +} + +static int direct_trap(const struct lguest *lg, + const struct desc_struct *trap, + unsigned int num) +{ + /* Hardware interrupts don't go to guest (except syscall). */ + if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) + return 0; + + /* We intercept page fault (demand shadow paging & cr2 saving) + protection fault (in/out emulation) and device not + available (TS handling), and hypercall */ + if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) + return 0; + + /* Interrupt gates (0xE) or not present (0x0) can't go direct. */ + return idt_type(trap->a, trap->b) == 0xF; +} + +void pin_stack_pages(struct lguest *lg) +{ + unsigned int i; + + for (i = 0; i < lg->stack_pages; i++) + pin_page(lg, lg->esp1 - i * PAGE_SIZE); +} + +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +{ + /* You cannot have a stack segment with priv level 0. */ + if ((seg & 0x3) != GUEST_PL) + kill_guest(lg, "bad stack segment %i", seg); + if (pages > 2) + kill_guest(lg, "bad stack pages %u", pages); + lg->ss1 = seg; + lg->esp1 = esp; + lg->stack_pages = pages; + pin_stack_pages(lg); +} + +/* Set up trap in IDT. */ +static void set_trap(struct lguest *lg, struct desc_struct *trap, + unsigned int num, u32 lo, u32 hi) +{ + u8 type = idt_type(lo, hi); + + if (!idt_present(lo, hi)) { + trap->a = trap->b = 0; + return; + } + + if (type != 0xE && type != 0xF) + kill_guest(lg, "bad IDT type %i", type); + + trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); + trap->b = (hi&0xFFFFEF00); +} + +void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +{ + /* Guest never handles: NMI, doublefault, hypercall, spurious irq. */ + if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) + return; + + lg->changed |= CHANGED_IDT; + if (num < ARRAY_SIZE(lg->idt)) + set_trap(lg, &lg->idt[num], num, lo, hi); + else if (num == SYSCALL_VECTOR) + set_trap(lg, &lg->syscall_idt, num, lo, hi); +} + +static void default_idt_entry(struct desc_struct *idt, + int trap, + const unsigned long handler) +{ + u32 flags = 0x8e00; + + /* They can't "int" into any of them except hypercall. */ + if (trap == LGUEST_TRAP_ENTRY) + flags |= (GUEST_PL << 13); + + idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); + idt->b = (handler&0xFFFF0000) | flags; +} + +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) + default_idt_entry(&state->guest_idt[i], i, def[i]); +} + +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def) +{ + unsigned int i; + + /* All hardware interrupts are same whatever the guest: only the + * traps might be different. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { + if (direct_trap(lg, &lg->idt[i], i)) + idt[i] = lg->idt[i]; + else + default_idt_entry(&idt[i], i, def[i]); + } + i = SYSCALL_VECTOR; + if (direct_trap(lg, &lg->syscall_idt, i)) + idt[i] = lg->syscall_idt; + else + default_idt_entry(&idt[i], i, def[i]); +} + +void guest_set_clockevent(struct lguest *lg, unsigned long delta) +{ + ktime_t expires; + + if (unlikely(delta == 0)) { + /* Clock event device is shutting down. */ + hrtimer_cancel(&lg->hrt); + return; + } + + expires = ktime_add_ns(ktime_get_real(), delta); + hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) +{ + struct lguest *lg = container_of(timer, struct lguest, hrt); + + set_bit(0, lg->irqs_pending); + if (lg->halted) + wake_up_process(lg->tsk); + return HRTIMER_NORESTART; +} + +void init_clockdev(struct lguest *lg) +{ + hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); + lg->hrt.function = clockdev_fn; +} diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c new file mode 100644 index 00000000000..06bdba2337e --- /dev/null +++ b/drivers/lguest/io.c @@ -0,0 +1,399 @@ +/* Simple I/O model for guests, based on shared memory. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/types.h> +#include <linux/futex.h> +#include <linux/jhash.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/uaccess.h> +#include "lg.h" + +static struct list_head dma_hash[61]; + +void lguest_io_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dma_hash); i++) + INIT_LIST_HEAD(&dma_hash[i]); +} + +/* FIXME: allow multi-page lengths. */ +static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (!dma->len[i]) + return 1; + if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) + goto kill; + if (dma->len[i] > PAGE_SIZE) + goto kill; + /* We could do over a page, but is it worth it? */ + if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) + goto kill; + } + return 1; + +kill: + kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); + return 0; +} + +static unsigned int hash(const union futex_key *key) +{ + return jhash2((u32*)&key->both.word, + (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + key->both.offset) + % ARRAY_SIZE(dma_hash); +} + +static inline int key_eq(const union futex_key *a, const union futex_key *b) +{ + return (a->both.word == b->both.word + && a->both.ptr == b->both.ptr + && a->both.offset == b->both.offset); +} + +/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ +static void unlink_dma(struct lguest_dma_info *dmainfo) +{ + BUG_ON(!mutex_is_locked(&lguest_lock)); + dmainfo->interrupt = 0; + list_del(&dmainfo->list); + drop_futex_key_refs(&dmainfo->key); +} + +static int unbind_dma(struct lguest *lg, + const union futex_key *key, + unsigned long dmas) +{ + int i, ret = 0; + + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { + unlink_dma(&lg->dma[i]); + ret = 1; + break; + } + } + return ret; +} + +int bind_dma(struct lguest *lg, + unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) +{ + unsigned int i; + int ret = 0; + union futex_key key; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + if (interrupt >= LGUEST_IRQS) + return 0; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad dma key %#lx", ukey); + goto unlock; + } + get_futex_key_refs(&key); + + if (interrupt == 0) + ret = unbind_dma(lg, &key, dmas); + else { + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + continue; + + lg->dma[i].dmas = dmas; + lg->dma[i].num_dmas = numdmas; + lg->dma[i].next_dma = 0; + lg->dma[i].key = key; + lg->dma[i].guestid = lg->guestid; + lg->dma[i].interrupt = interrupt; + list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); + ret = 1; + goto unlock; + } + } + drop_futex_key_refs(&key); +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + +/* lgread from another guest */ +static int lgread_other(struct lguest *lg, + void *buf, u32 addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { + memset(buf, 0, bytes); + kill_guest(lg, "bad address in registered DMA struct"); + return 0; + } + return 1; +} + +/* lgwrite to another guest */ +static int lgwrite_other(struct lguest *lg, u32 addr, + const void *buf, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) + != bytes)) { + kill_guest(lg, "bad address writing to registered DMA"); + return 0; + } + return 1; +} + +static u32 copy_data(struct lguest *srclg, + const struct lguest_dma *src, + const struct lguest_dma *dst, + struct page *pages[]) +{ + unsigned int totlen, si, di, srcoff, dstoff; + void *maddr = NULL; + + totlen = 0; + si = di = 0; + srcoff = dstoff = 0; + while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] + && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { + u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); + + if (!maddr) + maddr = kmap(pages[di]); + + /* FIXME: This is not completely portable, since + archs do different things for copy_to_user_page. */ + if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, + (void *__user)src->addr[si], len) != 0) { + kill_guest(srclg, "bad address in sending DMA"); + totlen = 0; + break; + } + + totlen += len; + srcoff += len; + dstoff += len; + if (srcoff == src->len[si]) { + si++; + srcoff = 0; + } + if (dstoff == dst->len[di]) { + kunmap(pages[di]); + maddr = NULL; + di++; + dstoff = 0; + } + } + + if (maddr) + kunmap(pages[di]); + + return totlen; +} + +/* Src is us, ie. current. */ +static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, + struct lguest *dstlg, const struct lguest_dma *dst) +{ + int i; + u32 ret; + struct page *pages[LGUEST_MAX_DMA_SECTIONS]; + + if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) + return 0; + + /* First get the destination pages */ + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (dst->len[i] == 0) + break; + if (get_user_pages(dstlg->tsk, dstlg->mm, + dst->addr[i], 1, 1, 1, pages+i, NULL) + != 1) { + kill_guest(dstlg, "Error mapping DMA pages"); + ret = 0; + goto drop_pages; + } + } + + /* Now copy until we run out of src or dst. */ + ret = copy_data(srclg, src, dst, pages); + +drop_pages: + while (--i >= 0) + put_page(pages[i]); + return ret; +} + +static int dma_transfer(struct lguest *srclg, + unsigned long udma, + struct lguest_dma_info *dst) +{ + struct lguest_dma dst_dma, src_dma; + struct lguest *dstlg; + u32 i, dma = 0; + + dstlg = &lguests[dst->guestid]; + /* Get our dma list. */ + lgread(srclg, &src_dma, udma, sizeof(src_dma)); + + /* We can't deadlock against them dmaing to us, because this + * is all under the lguest_lock. */ + down_read(&dstlg->mm->mmap_sem); + + for (i = 0; i < dst->num_dmas; i++) { + dma = (dst->next_dma + i) % dst->num_dmas; + if (!lgread_other(dstlg, &dst_dma, + dst->dmas + dma * sizeof(struct lguest_dma), + sizeof(dst_dma))) { + goto fail; + } + if (!dst_dma.used_len) + break; + } + if (i != dst->num_dmas) { + unsigned long used_lenp; + unsigned int ret; + + ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); + /* Put used length in src. */ + lgwrite_u32(srclg, + udma+offsetof(struct lguest_dma, used_len), ret); + if (ret == 0 && src_dma.len[0] != 0) + goto fail; + + /* Make sure destination sees contents before length. */ + wmb(); + used_lenp = dst->dmas + + dma * sizeof(struct lguest_dma) + + offsetof(struct lguest_dma, used_len); + lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); + dst->next_dma++; + } + up_read(&dstlg->mm->mmap_sem); + + /* Do this last so dst doesn't simply sleep on lock. */ + set_bit(dst->interrupt, dstlg->irqs_pending); + wake_up_process(dstlg->tsk); + return i == dst->num_dmas; + +fail: + up_read(&dstlg->mm->mmap_sem); + return 0; +} + +void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) +{ + union futex_key key; + int empty = 0; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + +again: + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad sending DMA key"); + goto unlock; + } + /* Shared mapping? Look for other guests... */ + if (key.shared.offset & 1) { + struct lguest_dma_info *i; + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (i->guestid == lg->guestid) + continue; + if (!key_eq(&key, &i->key)) + continue; + + empty += dma_transfer(lg, udma, i); + break; + } + if (empty == 1) { + /* Give any recipients one chance to restock. */ + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + empty++; + goto again; + } + } else { + /* Private mapping: tell our userspace. */ + lg->dma_is_pending = 1; + lg->pending_dma = udma; + lg->pending_key = ukey; + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); +} + +void release_all_dma(struct lguest *lg) +{ + unsigned int i; + + BUG_ON(!mutex_is_locked(&lguest_lock)); + + down_read(&lg->mm->mmap_sem); + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + unlink_dma(&lg->dma[i]); + } + up_read(&lg->mm->mmap_sem); +} + +/* Userspace wants a dma buffer from this guest. */ +unsigned long get_dma_buffer(struct lguest *lg, + unsigned long ukey, unsigned long *interrupt) +{ + unsigned long ret = 0; + union futex_key key; + struct lguest_dma_info *i; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad registered DMA buffer"); + goto unlock; + } + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { + unsigned int j; + for (j = 0; j < i->num_dmas; j++) { + struct lguest_dma dma; + + ret = i->dmas + j * sizeof(struct lguest_dma); + lgread(lg, &dma, ret, sizeof(dma)); + if (dma.used_len == 0) + break; + } + *interrupt = i->interrupt; + break; + } + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h new file mode 100644 index 00000000000..3e2ddfbc816 --- /dev/null +++ b/drivers/lguest/lg.h @@ -0,0 +1,261 @@ +#ifndef _LGUEST_H +#define _LGUEST_H + +#include <asm/desc.h> + +#define GDT_ENTRY_LGUEST_CS 10 +#define GDT_ENTRY_LGUEST_DS 11 +#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) +#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/stringify.h> +#include <linux/binfmts.h> +#include <linux/futex.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> +#include <linux/wait.h> +#include <linux/err.h> +#include <asm/semaphore.h> +#include "irq_vectors.h" + +#define GUEST_PL 1 + +struct lguest_regs +{ + /* Manually saved part. */ + unsigned long ebx, ecx, edx; + unsigned long esi, edi, ebp; + unsigned long gs; + unsigned long eax; + unsigned long fs, ds, es; + unsigned long trapnum, errcode; + /* Trap pushed part */ + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +}; + +void free_pagetables(void); +int init_pagetables(struct page **switcher_page, unsigned int pages); + +/* Full 4G segment descriptors, suitable for CS and DS. */ +#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) +#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) + +struct lguest_dma_info +{ + struct list_head list; + union futex_key key; + unsigned long dmas; + u16 next_dma; + u16 num_dmas; + u16 guestid; + u8 interrupt; /* 0 when not registered */ +}; + +/* We have separate types for the guest's ptes & pgds and the shadow ptes & + * pgds. Since this host might use three-level pagetables and the guest and + * shadow pagetables don't, we can't use the normal pte_t/pgd_t. */ +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spte_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpte_t; +#define mkgpte(_val) ((gpte_t){.raw.val = _val}) +#define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) + +struct pgdir +{ + unsigned long cr3; + spgd_t *pgdir; +}; + +/* This is a guest-specific page (mapped ro) into the guest. */ +struct lguest_ro_state +{ + /* Host information we need to restore when we switch back. */ + u32 host_cr3; + struct Xgt_desc_struct host_idt_desc; + struct Xgt_desc_struct host_gdt_desc; + u32 host_sp; + + /* Fields which are used when guest is running. */ + struct Xgt_desc_struct guest_idt_desc; + struct Xgt_desc_struct guest_gdt_desc; + struct i386_hw_tss guest_tss; + struct desc_struct guest_idt[IDT_ENTRIES]; + struct desc_struct guest_gdt[GDT_ENTRIES]; +}; + +/* We have two pages shared with guests, per cpu. */ +struct lguest_pages +{ + /* This is the stack page mapped rw in guest */ + char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; + struct lguest_regs regs; + + /* This is the host state & guest descriptor page, ro in guest */ + struct lguest_ro_state state; +} __attribute__((aligned(PAGE_SIZE))); + +#define CHANGED_IDT 1 +#define CHANGED_GDT 2 +#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ +#define CHANGED_ALL 3 + +/* The private info the thread maintains about the guest. */ +struct lguest +{ + /* At end of a page shared mapped over lguest_pages in guest. */ + unsigned long regs_page; + struct lguest_regs *regs; + struct lguest_data __user *lguest_data; + struct task_struct *tsk; + struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ + u16 guestid; + u32 pfn_limit; + u32 page_offset; + u32 cr2; + int halted; + int ts; + u32 next_hcall; + u32 esp1; + u8 ss1; + + /* Do we need to stop what we're doing and return to userspace? */ + int break_out; + wait_queue_head_t break_wq; + + /* Bitmap of what has changed: see CHANGED_* above. */ + int changed; + struct lguest_pages *last_pages; + + /* We keep a small number of these. */ + u32 pgdidx; + struct pgdir pgdirs[4]; + + /* Cached wakeup: we hold a reference to this task. */ + struct task_struct *wake; + + unsigned long noirq_start, noirq_end; + int dma_is_pending; + unsigned long pending_dma; /* struct lguest_dma */ + unsigned long pending_key; /* address they're sending to */ + + unsigned int stack_pages; + u32 tsc_khz; + + struct lguest_dma_info dma[LGUEST_MAX_DMA]; + + /* Dead? */ + const char *dead; + + /* The GDT entries copied into lguest_ro_state when running. */ + struct desc_struct gdt[GDT_ENTRIES]; + + /* The IDT entries: some copied into lguest_ro_state when running. */ + struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS]; + struct desc_struct syscall_idt; + + /* Virtual clock device */ + struct hrtimer hrt; + + /* Pending virtual interrupts */ + DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); +}; + +extern struct lguest lguests[]; +extern struct mutex lguest_lock; + +/* core.c: */ +u32 lgread_u32(struct lguest *lg, unsigned long addr); +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val); +void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len); +void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len); +int find_free_guest(void); +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len); +int run_guest(struct lguest *lg, unsigned long __user *user); + + +/* interrupts_and_traps.c: */ +void maybe_do_interrupt(struct lguest *lg); +int deliver_trap(struct lguest *lg, unsigned int num); +void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); +void pin_stack_pages(struct lguest *lg); +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def); +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def); +void guest_set_clockevent(struct lguest *lg, unsigned long delta); +void init_clockdev(struct lguest *lg); + +/* segments.c: */ +void setup_default_gdt_entries(struct lguest_ro_state *state); +void setup_guest_gdt(struct lguest *lg); +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); +void guest_load_tls(struct lguest *lg, unsigned long tls_array); +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); + +/* page_tables.c: */ +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +void free_guest_pagetable(struct lguest *lg); +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); +void guest_pagetable_clear_all(struct lguest *lg); +void guest_pagetable_flush_user(struct lguest *lg); +void guest_set_pte(struct lguest *lg, unsigned long cr3, + unsigned long vaddr, gpte_t val); +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); +int demand_page(struct lguest *info, unsigned long cr2, int errcode); +void pin_page(struct lguest *lg, unsigned long vaddr); + +/* lguest_user.c: */ +int lguest_device_init(void); +void lguest_device_remove(void); + +/* io.c: */ +void lguest_io_init(void); +int bind_dma(struct lguest *lg, + unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); +void send_dma(struct lguest *info, unsigned long key, unsigned long udma); +void release_all_dma(struct lguest *lg); +unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, + unsigned long *interrupt); + +/* hypercalls.c: */ +void do_hypercalls(struct lguest *lg); + +#define kill_guest(lg, fmt...) \ +do { \ + if (!(lg)->dead) { \ + (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ + if (!(lg)->dead) \ + (lg)->dead = ERR_PTR(-ENOMEM); \ + } \ +} while(0) + +static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) +{ + return vaddr - lg->page_offset; +} +#endif /* __ASSEMBLY__ */ +#endif /* _LGUEST_H */ diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c new file mode 100644 index 00000000000..b9a58b78c99 --- /dev/null +++ b/drivers/lguest/lguest.c @@ -0,0 +1,621 @@ +/* + * Lguest specific paravirt-ops implementation + * + * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/kernel.h> +#include <linux/start_kernel.h> +#include <linux/string.h> +#include <linux/console.h> +#include <linux/screen_info.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> +#include <linux/lguest_bus.h> +#include <asm/paravirt.h> +#include <asm/param.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/desc.h> +#include <asm/setup.h> +#include <asm/e820.h> +#include <asm/mce.h> +#include <asm/io.h> +//#include <asm/sched-clock.h> + +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; +extern void lguest_iret(void); + +struct lguest_data lguest_data = { + .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, + .noirq_start = (u32)lguest_noirq_start, + .noirq_end = (u32)lguest_noirq_end, + .blocked_interrupts = { 1 }, /* Block timer interrupts */ +}; +struct lguest_device_desc *lguest_devices; + +static enum paravirt_lazy_mode lazy_mode; +static void lguest_lazy_mode(enum paravirt_lazy_mode mode) +{ + if (mode == PARAVIRT_LAZY_FLUSH) { + if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + } else { + lazy_mode = mode; + if (mode == PARAVIRT_LAZY_NONE) + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + } +} + +static void lazy_hcall(unsigned long call, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) +{ + if (lazy_mode == PARAVIRT_LAZY_NONE) + hcall(call, arg1, arg2, arg3); + else + async_hcall(call, arg1, arg2, arg3); +} + +void async_hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + /* Note: This code assumes we're uniprocessor. */ + static unsigned int next_call; + unsigned long flags; + + local_irq_save(flags); + if (lguest_data.hcall_status[next_call] != 0xFF) { + /* Table full, so do normal hcall which will flush table. */ + hcall(call, arg1, arg2, arg3); + } else { + lguest_data.hcalls[next_call].eax = call; + lguest_data.hcalls[next_call].edx = arg1; + lguest_data.hcalls[next_call].ebx = arg2; + lguest_data.hcalls[next_call].ecx = arg3; + /* Make sure host sees arguments before "valid" flag. */ + wmb(); + lguest_data.hcall_status[next_call] = 0; + if (++next_call == LHCALL_RING_SIZE) + next_call = 0; + } + local_irq_restore(flags); +} + +void lguest_send_dma(unsigned long key, struct lguest_dma *dma) +{ + dma->used_len = 0; + hcall(LHCALL_SEND_DMA, key, __pa(dma), 0); +} + +int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, + unsigned int num, u8 irq) +{ + if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq)) + return -ENOMEM; + return 0; +} + +void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas) +{ + hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0); +} + +/* For guests, device memory can be used as normal memory, so we cast away the + * __iomem to quieten sparse. */ +void *lguest_map(unsigned long phys_addr, unsigned long pages) +{ + return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages); +} + +void lguest_unmap(void *addr) +{ + iounmap((__force void __iomem *)addr); +} + +static unsigned long save_fl(void) +{ + return lguest_data.irq_enabled; +} + +static void restore_fl(unsigned long flags) +{ + /* FIXME: Check if interrupt pending... */ + lguest_data.irq_enabled = flags; +} + +static void irq_disable(void) +{ + lguest_data.irq_enabled = 0; +} + +static void irq_enable(void) +{ + /* FIXME: Check if interrupt pending... */ + lguest_data.irq_enabled = X86_EFLAGS_IF; +} + +static void lguest_write_idt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + write_dt_entry(dt, entrynum, low, high); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); +} + +static void lguest_load_idt(const struct Xgt_desc_struct *desc) +{ + unsigned int i; + struct desc_struct *idt = (void *)desc->address; + + for (i = 0; i < (desc->size+1)/8; i++) + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); +} + +static void lguest_load_gdt(const struct Xgt_desc_struct *desc) +{ + BUG_ON((desc->size+1)/8 != GDT_ENTRIES); + hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); +} + +static void lguest_write_gdt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + write_dt_entry(dt, entrynum, low, high); + hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); +} + +static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) +{ + lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); +} + +static void lguest_set_ldt(const void *addr, unsigned entries) +{ +} + +static void lguest_load_tr_desc(void) +{ +} + +static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + int function = *eax; + + native_cpuid(eax, ebx, ecx, edx); + switch (function) { + case 1: /* Basic feature request. */ + /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ + *ecx &= 0x00002201; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ + *edx &= 0x07808101; + /* Host wants to know when we flush kernel pages: set PGE. */ + *edx |= 0x00002000; + break; + case 0x80000000: + /* Futureproof this a little: if they ask how much extended + * processor information, limit it to known fields. */ + if (*eax > 0x80000008) + *eax = 0x80000008; + break; + } +} + +static unsigned long current_cr0, current_cr3; +static void lguest_write_cr0(unsigned long val) +{ + lazy_hcall(LHCALL_TS, val & 8, 0, 0); + current_cr0 = val; +} + +static unsigned long lguest_read_cr0(void) +{ + return current_cr0; +} + +static void lguest_clts(void) +{ + lazy_hcall(LHCALL_TS, 0, 0, 0); + current_cr0 &= ~8U; +} + +static unsigned long lguest_read_cr2(void) +{ + return lguest_data.cr2; +} + +static void lguest_write_cr3(unsigned long cr3) +{ + lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); + current_cr3 = cr3; +} + +static unsigned long lguest_read_cr3(void) +{ + return current_cr3; +} + +/* Used to enable/disable PGE, but we don't care. */ +static unsigned long lguest_read_cr4(void) +{ + return 0; +} + +static void lguest_write_cr4(unsigned long val) +{ +} + +static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); +} + +/* We only support two-level pagetables at the moment. */ +static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; + lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, + (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); +} + +/* FIXME: Eliminate all callers of this. */ +static void lguest_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + /* Don't bother with hypercall before initial setup. */ + if (current_cr3) + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +static void lguest_flush_tlb_single(unsigned long addr) +{ + /* Simply set it to zero, and it will fault back in. */ + lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); +} + +static void lguest_flush_tlb_user(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); +} + +static void lguest_flush_tlb_kernel(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +static void disable_lguest_irq(unsigned int irq) +{ + set_bit(irq, lguest_data.blocked_interrupts); +} + +static void enable_lguest_irq(unsigned int irq) +{ + clear_bit(irq, lguest_data.blocked_interrupts); + /* FIXME: If it's pending? */ +} + +static struct irq_chip lguest_irq_controller = { + .name = "lguest", + .mask = disable_lguest_irq, + .mask_ack = disable_lguest_irq, + .unmask = enable_lguest_irq, +}; + +static void __init lguest_init_IRQ(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_IRQS; i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != SYSCALL_VECTOR) { + set_intr_gate(vector, interrupt[i]); + set_irq_chip_and_handler(i, &lguest_irq_controller, + handle_level_irq); + } + } + irq_ctx_init(smp_processor_id()); +} + +static unsigned long lguest_get_wallclock(void) +{ + return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); +} + +static cycle_t lguest_clock_read(void) +{ + if (lguest_data.tsc_khz) + return native_read_tsc(); + else + return jiffies; +} + +/* This is what we tell the kernel is our clocksource. */ +static struct clocksource lguest_clock = { + .name = "lguest", + .rating = 400, + .read = lguest_clock_read, +}; + +/* We also need a "struct clock_event_device": Linux asks us to set it to go + * off some time in the future. Actually, James Morris figured all this out, I + * just applied the patch. */ +static int lguest_clockevent_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + if (delta < LG_CLOCK_MIN_DELTA) { + if (printk_ratelimit()) + printk(KERN_DEBUG "%s: small delta %lu ns\n", + __FUNCTION__, delta); + return -ETIME; + } + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); + return 0; +} + +static void lguest_clockevent_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* A 0 argument shuts the clock down. */ + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* This is what we expect. */ + break; + case CLOCK_EVT_MODE_PERIODIC: + BUG(); + } +} + +/* This describes our primitive timer chip. */ +static struct clock_event_device lguest_clockevent = { + .name = "lguest", + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = lguest_clockevent_set_next_event, + .set_mode = lguest_clockevent_set_mode, + .rating = INT_MAX, + .mult = 1, + .shift = 0, + .min_delta_ns = LG_CLOCK_MIN_DELTA, + .max_delta_ns = LG_CLOCK_MAX_DELTA, +}; + +/* This is the Guest timer interrupt handler (hardware interrupt 0). We just + * call the clockevent infrastructure and it does whatever needs doing. */ +static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +{ + unsigned long flags; + + /* Don't interrupt us while this is running. */ + local_irq_save(flags); + lguest_clockevent.event_handler(&lguest_clockevent); + local_irq_restore(flags); +} + +static void lguest_time_init(void) +{ + set_irq_handler(0, lguest_time_irq); + + /* We use the TSC if the Host tells us we can, otherwise a dumb + * jiffies-based clock. */ + if (lguest_data.tsc_khz) { + lguest_clock.shift = 22; + lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, + lguest_clock.shift); + lguest_clock.mask = CLOCKSOURCE_MASK(64); + lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; + } else { + /* To understand this, start at kernel/time/jiffies.c... */ + lguest_clock.shift = 8; + lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; + lguest_clock.mask = CLOCKSOURCE_MASK(32); + } + clocksource_register(&lguest_clock); + + /* We can't set cpumask in the initializer: damn C limitations! */ + lguest_clockevent.cpumask = cpumask_of_cpu(0); + clockevents_register_device(&lguest_clockevent); + + enable_lguest_irq(0); +} + +static void lguest_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0, + THREAD_SIZE/PAGE_SIZE); +} + +static void lguest_set_debugreg(int regno, unsigned long value) +{ + /* FIXME: Implement */ +} + +static void lguest_wbinvd(void) +{ +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void lguest_apic_write(unsigned long reg, unsigned long v) +{ +} + +static unsigned long lguest_apic_read(unsigned long reg) +{ + return 0; +} +#endif + +static void lguest_safe_halt(void) +{ + hcall(LHCALL_HALT, 0, 0, 0); +} + +static void lguest_power_off(void) +{ + hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); +} + +static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) +{ + hcall(LHCALL_CRASH, __pa(p), 0, 0); + return NOTIFY_DONE; +} + +static struct notifier_block paniced = { + .notifier_call = lguest_panic +}; + +static __init char *lguest_memory_setup(void) +{ + /* We do this here because lockcheck barfs if before start_kernel */ + atomic_notifier_chain_register(&panic_notifier_list, &paniced); + + add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); + return "LGUEST"; +} + +static const struct lguest_insns +{ + const char *start, *end; +} lguest_insns[] = { + [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, + [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, + [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, + [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, +}; +static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) +{ + unsigned int insn_len; + + /* Don't touch it if we don't have a replacement */ + if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) + return paravirt_patch_default(type, clobber, insns, len); + + insn_len = lguest_insns[type].end - lguest_insns[type].start; + + /* Similarly if we can't fit replacement. */ + if (len < insn_len) + return paravirt_patch_default(type, clobber, insns, len); + + memcpy(insns, lguest_insns[type].start, insn_len); + return insn_len; +} + +__init void lguest_init(void *boot) +{ + /* Copy boot parameters first. */ + memcpy(&boot_params, boot, PARAM_SIZE); + memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), + COMMAND_LINE_SIZE); + + paravirt_ops.name = "lguest"; + paravirt_ops.paravirt_enabled = 1; + paravirt_ops.kernel_rpl = 1; + + paravirt_ops.save_fl = save_fl; + paravirt_ops.restore_fl = restore_fl; + paravirt_ops.irq_disable = irq_disable; + paravirt_ops.irq_enable = irq_enable; + paravirt_ops.load_gdt = lguest_load_gdt; + paravirt_ops.memory_setup = lguest_memory_setup; + paravirt_ops.cpuid = lguest_cpuid; + paravirt_ops.write_cr3 = lguest_write_cr3; + paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; + paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; + paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; + paravirt_ops.set_pte = lguest_set_pte; + paravirt_ops.set_pte_at = lguest_set_pte_at; + paravirt_ops.set_pmd = lguest_set_pmd; +#ifdef CONFIG_X86_LOCAL_APIC + paravirt_ops.apic_write = lguest_apic_write; + paravirt_ops.apic_write_atomic = lguest_apic_write; + paravirt_ops.apic_read = lguest_apic_read; +#endif + paravirt_ops.load_idt = lguest_load_idt; + paravirt_ops.iret = lguest_iret; + paravirt_ops.load_esp0 = lguest_load_esp0; + paravirt_ops.load_tr_desc = lguest_load_tr_desc; + paravirt_ops.set_ldt = lguest_set_ldt; + paravirt_ops.load_tls = lguest_load_tls; + paravirt_ops.set_debugreg = lguest_set_debugreg; + paravirt_ops.clts = lguest_clts; + paravirt_ops.read_cr0 = lguest_read_cr0; + paravirt_ops.write_cr0 = lguest_write_cr0; + paravirt_ops.init_IRQ = lguest_init_IRQ; + paravirt_ops.read_cr2 = lguest_read_cr2; + paravirt_ops.read_cr3 = lguest_read_cr3; + paravirt_ops.read_cr4 = lguest_read_cr4; + paravirt_ops.write_cr4 = lguest_write_cr4; + paravirt_ops.write_gdt_entry = lguest_write_gdt_entry; + paravirt_ops.write_idt_entry = lguest_write_idt_entry; + paravirt_ops.patch = lguest_patch; + paravirt_ops.safe_halt = lguest_safe_halt; + paravirt_ops.get_wallclock = lguest_get_wallclock; + paravirt_ops.time_init = lguest_time_init; + paravirt_ops.set_lazy_mode = lguest_lazy_mode; + paravirt_ops.wbinvd = lguest_wbinvd; + + hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); + + /* We use top of mem for initial pagetables. */ + init_pg_tables_end = __pa(pg0); + + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); + + reserve_top_address(lguest_data.reserve_mem); + + lockdep_init(); + + paravirt_disable_iospace(); + + cpu_detect(&new_cpu_data); + /* head.S usually sets up the first capability word, so do it here. */ + new_cpu_data.x86_capability[0] = cpuid_edx(1); + + /* Math is always hard! */ + new_cpu_data.hard_math = 1; + +#ifdef CONFIG_X86_MCE + mce_disabled = 1; +#endif + +#ifdef CONFIG_ACPI + acpi_disabled = 1; + acpi_ht = 0; +#endif + + add_preferred_console("hvc", 0, NULL); + + pm_power_off = lguest_power_off; + start_kernel(); +} diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S new file mode 100644 index 00000000000..00046c57b5b --- /dev/null +++ b/drivers/lguest/lguest_asm.S @@ -0,0 +1,56 @@ +#include <linux/linkage.h> +#include <linux/lguest.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> + +/* FIXME: Once asm/processor-flags.h goes in, include that */ +#define X86_EFLAGS_IF 0x00000200 + +/* + * This is where we begin: we have a magic signature which the launcher looks + * for. The plan is that the Linux boot protocol will be extended with a + * "platform type" field which will guide us here from the normal entry point, + * but for the moment this suffices. We pass the virtual address of the boot + * info to lguest_init(). + * + * We put it in .init.text will be discarded after boot. + */ +.section .init.text, "ax", @progbits +.ascii "GenuineLguest" + /* Set up initial stack. */ + movl $(init_thread_union+THREAD_SIZE),%esp + movl %esi, %eax + addl $__PAGE_OFFSET, %eax + jmp lguest_init + +/* The templates for inline patching. */ +#define LGUEST_PATCH(name, insns...) \ + lgstart_##name: insns; lgend_##name:; \ + .globl lgstart_##name; .globl lgend_##name + +LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) + +.text +/* These demark the EIP range where host should never deliver interrupts. */ +.global lguest_noirq_start +.global lguest_noirq_end + +/* + * We move eflags word to lguest_data.irq_enabled to restore interrupt state. + * For page faults, gpfs and virtual interrupts, the hypervisor has saved + * eflags manually, otherwise it was delivered directly and so eflags reflects + * the real machine IF state, ie. interrupts on. Since the kernel always dies + * if it takes such a trap with interrupts disabled anyway, turning interrupts + * back on unconditionally here is OK. + */ +ENTRY(lguest_iret) + pushl %eax + movl 12(%esp), %eax +lguest_noirq_start: + movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled + popl %eax + iret +lguest_noirq_end: diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c new file mode 100644 index 00000000000..18d6ab21a43 --- /dev/null +++ b/drivers/lguest/lguest_bus.c @@ -0,0 +1,148 @@ +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/lguest_bus.h> +#include <asm/io.h> + +static ssize_t type_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hu", lguest_devices[dev->index].type); +} +static ssize_t features_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].features); +} +static ssize_t pfn_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%u", lguest_devices[dev->index].pfn); +} +static ssize_t status_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].status); +} +static ssize_t status_store(struct device *_dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1) + return -EINVAL; + return count; +} +static struct device_attribute lguest_dev_attrs[] = { + __ATTR_RO(type), + __ATTR_RO(features), + __ATTR_RO(pfn), + __ATTR(status, 0644, status_show, status_store), + __ATTR_NULL +}; + +static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv); + + return (drv->device_type == lguest_devices[dev->index].type); +} + +struct lguest_bus { + struct bus_type bus; + struct device dev; +}; + +static struct lguest_bus lguest_bus = { + .bus = { + .name = "lguest", + .match = lguest_dev_match, + .dev_attrs = lguest_dev_attrs, + }, + .dev = { + .parent = NULL, + .bus_id = "lguest", + } +}; + +static int lguest_dev_probe(struct device *_dev) +{ + int ret; + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(dev->dev.driver, + struct lguest_driver, drv); + + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; + ret = drv->probe(dev); + if (ret == 0) + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK; + return ret; +} + +int register_lguest_driver(struct lguest_driver *drv) +{ + if (!lguest_devices) + return 0; + + drv->drv.bus = &lguest_bus.bus; + drv->drv.name = drv->name; + drv->drv.owner = drv->owner; + drv->drv.probe = lguest_dev_probe; + + return driver_register(&drv->drv); +} +EXPORT_SYMBOL_GPL(register_lguest_driver); + +static void add_lguest_device(unsigned int index) +{ + struct lguest_device *new; + + lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; + new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); + if (!new) { + printk(KERN_EMERG "Cannot allocate lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + return; + } + + new->index = index; + new->private = NULL; + memset(&new->dev, 0, sizeof(new->dev)); + new->dev.parent = &lguest_bus.dev; + new->dev.bus = &lguest_bus.bus; + sprintf(new->dev.bus_id, "%u", index); + if (device_register(&new->dev) != 0) { + printk(KERN_EMERG "Cannot register lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + kfree(new); + } +} + +static void scan_devices(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DEVICES; i++) + if (lguest_devices[i].type) + add_lguest_device(i); +} + +static int __init lguest_bus_init(void) +{ + if (strcmp(paravirt_ops.name, "lguest") != 0) + return 0; + + /* Devices are in page above top of "normal" mem. */ + lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); + + if (bus_register(&lguest_bus.bus) != 0 + || device_register(&lguest_bus.dev) != 0) + panic("lguest bus registration failed"); + + scan_devices(); + return 0; +} +postcore_initcall(lguest_bus_init); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c new file mode 100644 index 00000000000..e90d7a783da --- /dev/null +++ b/drivers/lguest/lguest_user.c @@ -0,0 +1,236 @@ +/* Userspace control of the guest, via /dev/lguest. */ +#include <linux/uaccess.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include "lg.h" + +static void setup_regs(struct lguest_regs *regs, unsigned long start) +{ + /* Write out stack in format lguest expects, so we can switch to it. */ + regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; + regs->cs = __KERNEL_CS|GUEST_PL; + regs->eflags = 0x202; /* Interrupts enabled. */ + regs->eip = start; + /* esi points to our boot information (physical address 0) */ +} + +/* + addr */ +static long user_get_dma(struct lguest *lg, const u32 __user *input) +{ + unsigned long key, udma, irq; + + if (get_user(key, input) != 0) + return -EFAULT; + udma = get_dma_buffer(lg, key, &irq); + if (!udma) + return -ENOENT; + + /* We put irq number in udma->used_len. */ + lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); + return udma; +} + +/* To force the Guest to stop running and return to the Launcher, the + * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The + * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ +static int break_guest_out(struct lguest *lg, const u32 __user *input) +{ + unsigned long on; + + /* Fetch whether they're turning break on or off.. */ + if (get_user(on, input) != 0) + return -EFAULT; + + if (on) { + lg->break_out = 1; + /* Pop it out (may be running on different CPU) */ + wake_up_process(lg->tsk); + /* Wait for them to reset it */ + return wait_event_interruptible(lg->break_wq, !lg->break_out); + } else { + lg->break_out = 0; + wake_up(&lg->break_wq); + return 0; + } +} + +/* + irq */ +static int user_send_irq(struct lguest *lg, const u32 __user *input) +{ + u32 irq; + + if (get_user(irq, input) != 0) + return -EFAULT; + if (irq >= LGUEST_IRQS) + return -EINVAL; + set_bit(irq, lg->irqs_pending); + return 0; +} + +static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return -EINVAL; + + /* If you're not the task which owns the guest, go away. */ + if (current != lg->tsk) + return -EPERM; + + if (lg->dead) { + size_t len; + + if (IS_ERR(lg->dead)) + return PTR_ERR(lg->dead); + + len = min(size, strlen(lg->dead)+1); + if (copy_to_user(user, lg->dead, len) != 0) + return -EFAULT; + return len; + } + + if (lg->dma_is_pending) + lg->dma_is_pending = 0; + + return run_guest(lg, (unsigned long __user *)user); +} + +/* Take: pfnlimit, pgdir, start, pageoffset. */ +static int initialize(struct file *file, const u32 __user *input) +{ + struct lguest *lg; + int err, i; + u32 args[4]; + + /* We grab the Big Lguest lock, which protects the global array + * "lguests" and multiple simultaneous initializations. */ + mutex_lock(&lguest_lock); + + if (file->private_data) { + err = -EBUSY; + goto unlock; + } + + if (copy_from_user(args, input, sizeof(args)) != 0) { + err = -EFAULT; + goto unlock; + } + + i = find_free_guest(); + if (i < 0) { + err = -ENOSPC; + goto unlock; + } + lg = &lguests[i]; + lg->guestid = i; + lg->pfn_limit = args[0]; + lg->page_offset = args[3]; + lg->regs_page = get_zeroed_page(GFP_KERNEL); + if (!lg->regs_page) { + err = -ENOMEM; + goto release_guest; + } + lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); + + err = init_guest_pagetable(lg, args[1]); + if (err) + goto free_regs; + + setup_regs(lg->regs, args[2]); + setup_guest_gdt(lg); + init_clockdev(lg); + lg->tsk = current; + lg->mm = get_task_mm(lg->tsk); + init_waitqueue_head(&lg->break_wq); + lg->last_pages = NULL; + file->private_data = lg; + + mutex_unlock(&lguest_lock); + + return sizeof(args); + +free_regs: + free_page(lg->regs_page); +release_guest: + memset(lg, 0, sizeof(*lg)); +unlock: + mutex_unlock(&lguest_lock); + return err; +} + +static ssize_t write(struct file *file, const char __user *input, + size_t size, loff_t *off) +{ + struct lguest *lg = file->private_data; + u32 req; + + if (get_user(req, input) != 0) + return -EFAULT; + input += sizeof(req); + + if (req != LHREQ_INITIALIZE && !lg) + return -EINVAL; + if (lg && lg->dead) + return -ENOENT; + + /* If you're not the task which owns the Guest, you can only break */ + if (lg && current != lg->tsk && req != LHREQ_BREAK) + return -EPERM; + + switch (req) { + case LHREQ_INITIALIZE: + return initialize(file, (const u32 __user *)input); + case LHREQ_GETDMA: + return user_get_dma(lg, (const u32 __user *)input); + case LHREQ_IRQ: + return user_send_irq(lg, (const u32 __user *)input); + case LHREQ_BREAK: + return break_guest_out(lg, (const u32 __user *)input); + default: + return -EINVAL; + } +} + +static int close(struct inode *inode, struct file *file) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return 0; + + mutex_lock(&lguest_lock); + /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ + hrtimer_cancel(&lg->hrt); + release_all_dma(lg); + free_guest_pagetable(lg); + mmput(lg->mm); + if (!IS_ERR(lg->dead)) + kfree(lg->dead); + free_page(lg->regs_page); + memset(lg, 0, sizeof(*lg)); + mutex_unlock(&lguest_lock); + return 0; +} + +static struct file_operations lguest_fops = { + .owner = THIS_MODULE, + .release = close, + .write = write, + .read = read, +}; +static struct miscdevice lguest_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "lguest", + .fops = &lguest_fops, +}; + +int __init lguest_device_init(void) +{ + return misc_register(&lguest_dev); +} + +void __exit lguest_device_remove(void) +{ + misc_deregister(&lguest_dev); +} diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c new file mode 100644 index 00000000000..1b0ba09b126 --- /dev/null +++ b/drivers/lguest/page_tables.c @@ -0,0 +1,411 @@ +/* Shadow page table operations. + * Copyright (C) Rusty Russell IBM Corporation 2006. + * GPL v2 and any later version */ +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/random.h> +#include <linux/percpu.h> +#include <asm/tlbflush.h> +#include "lg.h" + +#define PTES_PER_PAGE_SHIFT 10 +#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT) +#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) + +static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); +#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) + +static unsigned vaddr_to_pgd_index(unsigned long vaddr) +{ + return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); +} + +/* These access the shadow versions (ie. the ones used by the CPU). */ +static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) +{ + unsigned int index = vaddr_to_pgd_index(vaddr); + + if (index >= SWITCHER_PGD_INDEX) { + kill_guest(lg, "attempt to access switcher pages"); + index = 0; + } + return &lg->pgdirs[i].pgdir[index]; +} + +static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) +{ + spte_t *page = __va(spgd.pfn << PAGE_SHIFT); + BUG_ON(!(spgd.flags & _PAGE_PRESENT)); + return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; +} + +/* These access the guest versions. */ +static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) +{ + unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); + return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t); +} + +static unsigned long gpte_addr(struct lguest *lg, + gpgd_t gpgd, unsigned long vaddr) +{ + unsigned long gpage = gpgd.pfn << PAGE_SHIFT; + BUG_ON(!(gpgd.flags & _PAGE_PRESENT)); + return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); +} + +/* Do a virtual -> physical mapping on a user page. */ +static unsigned long get_pfn(unsigned long virtpfn, int write) +{ + struct page *page; + unsigned long ret = -1UL; + + down_read(¤t->mm->mmap_sem); + if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, + 1, write, 1, &page, NULL) == 1) + ret = page_to_pfn(page); + up_read(¤t->mm->mmap_sem); + return ret; +} + +static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) +{ + spte_t spte; + unsigned long pfn; + + /* We ignore the global flag. */ + spte.flags = (gpte.flags & ~_PAGE_GLOBAL); + pfn = get_pfn(gpte.pfn, write); + if (pfn == -1UL) { + kill_guest(lg, "failed to get page %u", gpte.pfn); + /* Must not put_page() bogus page on cleanup. */ + spte.flags = 0; + } + spte.pfn = pfn; + return spte; +} + +static void release_pte(spte_t pte) +{ + if (pte.flags & _PAGE_PRESENT) + put_page(pfn_to_page(pte.pfn)); +} + +static void check_gpte(struct lguest *lg, gpte_t gpte) +{ + if ((gpte.flags & (_PAGE_PWT|_PAGE_PSE)) || gpte.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page table entry"); +} + +static void check_gpgd(struct lguest *lg, gpgd_t gpgd) +{ + if ((gpgd.flags & ~_PAGE_TABLE) || gpgd.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page directory entry"); +} + +/* FIXME: We hold reference to pages, which prevents them from being + swapped. It'd be nice to have a callback when Linux wants to swap out. */ + +/* We fault pages in, which allows us to update accessed/dirty bits. + * Return true if we got page. */ +int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) +{ + gpgd_t gpgd; + spgd_t *spgd; + unsigned long gpte_ptr; + gpte_t gpte; + spte_t *spte; + + gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); + if (!(gpgd.flags & _PAGE_PRESENT)) + return 0; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) { + /* Get a page of PTEs for them. */ + unsigned long ptepage = get_zeroed_page(GFP_KERNEL); + /* FIXME: Steal from self in this case? */ + if (!ptepage) { + kill_guest(lg, "out of memory allocating pte page"); + return 0; + } + check_gpgd(lg, gpgd); + spgd->raw.val = (__pa(ptepage) | gpgd.flags); + } + + gpte_ptr = gpte_addr(lg, gpgd, vaddr); + gpte = mkgpte(lgread_u32(lg, gpte_ptr)); + + /* No page? */ + if (!(gpte.flags & _PAGE_PRESENT)) + return 0; + + /* Write to read-only page? */ + if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) + return 0; + + /* User access to a non-user page? */ + if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) + return 0; + + check_gpte(lg, gpte); + gpte.flags |= _PAGE_ACCESSED; + if (errcode & 2) + gpte.flags |= _PAGE_DIRTY; + + /* We're done with the old pte. */ + spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + + /* We don't make it writable if this isn't a write: later + * write will fault so we can set dirty bit in guest. */ + if (gpte.flags & _PAGE_DIRTY) + *spte = gpte_to_spte(lg, gpte, 1); + else { + gpte_t ro_gpte = gpte; + ro_gpte.flags &= ~_PAGE_RW; + *spte = gpte_to_spte(lg, ro_gpte, 0); + } + + /* Now we update dirty/accessed on guest. */ + lgwrite_u32(lg, gpte_ptr, gpte.raw.val); + return 1; +} + +/* This is much faster than the full demand_page logic. */ +static int page_writable(struct lguest *lg, unsigned long vaddr) +{ + spgd_t *spgd; + unsigned long flags; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) + return 0; + + flags = spte_addr(lg, *spgd, vaddr)->flags; + return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); +} + +void pin_page(struct lguest *lg, unsigned long vaddr) +{ + if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) + kill_guest(lg, "bad stack page %#lx", vaddr); +} + +static void release_pgd(struct lguest *lg, spgd_t *spgd) +{ + if (spgd->flags & _PAGE_PRESENT) { + unsigned int i; + spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); + for (i = 0; i < PTES_PER_PAGE; i++) + release_pte(ptepage[i]); + free_page((long)ptepage); + spgd->raw.val = 0; + } +} + +static void flush_user_mappings(struct lguest *lg, int idx) +{ + unsigned int i; + for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) + release_pgd(lg, lg->pgdirs[idx].pgdir + i); +} + +void guest_pagetable_flush_user(struct lguest *lg) +{ + flush_user_mappings(lg, lg->pgdidx); +} + +static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].cr3 == pgtable) + break; + return i; +} + +static unsigned int new_pgdir(struct lguest *lg, + unsigned long cr3, + int *blank_pgdir) +{ + unsigned int next; + + next = random32() % ARRAY_SIZE(lg->pgdirs); + if (!lg->pgdirs[next].pgdir) { + lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[next].pgdir) + next = lg->pgdidx; + else + /* There are no mappings: you'll need to re-pin */ + *blank_pgdir = 1; + } + lg->pgdirs[next].cr3 = cr3; + /* Release all the non-kernel mappings. */ + flush_user_mappings(lg, next); + + return next; +} + +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) +{ + int newpgdir, repin = 0; + + newpgdir = find_pgdir(lg, pgtable); + if (newpgdir == ARRAY_SIZE(lg->pgdirs)) + newpgdir = new_pgdir(lg, pgtable, &repin); + lg->pgdidx = newpgdir; + if (repin) + pin_stack_pages(lg); +} + +static void release_all_pagetables(struct lguest *lg) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + for (j = 0; j < SWITCHER_PGD_INDEX; j++) + release_pgd(lg, lg->pgdirs[i].pgdir + j); +} + +void guest_pagetable_clear_all(struct lguest *lg) +{ + release_all_pagetables(lg); + pin_stack_pages(lg); +} + +static void do_set_pte(struct lguest *lg, int idx, + unsigned long vaddr, gpte_t gpte) +{ + spgd_t *spgd = spgd_addr(lg, idx, vaddr); + if (spgd->flags & _PAGE_PRESENT) { + spte_t *spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { + check_gpte(lg, gpte); + *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); + } else + spte->raw.val = 0; + } +} + +void guest_set_pte(struct lguest *lg, + unsigned long cr3, unsigned long vaddr, gpte_t gpte) +{ + /* Kernel mappings must be changed on all top levels. */ + if (vaddr >= lg->page_offset) { + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + do_set_pte(lg, i, vaddr, gpte); + } else { + int pgdir = find_pgdir(lg, cr3); + if (pgdir != ARRAY_SIZE(lg->pgdirs)) + do_set_pte(lg, pgdir, vaddr, gpte); + } +} + +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) +{ + int pgdir; + + if (idx >= SWITCHER_PGD_INDEX) + return; + + pgdir = find_pgdir(lg, cr3); + if (pgdir < ARRAY_SIZE(lg->pgdirs)) + release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); +} + +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +{ + /* We assume this in flush_user_mappings, so check now */ + if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) + return -EINVAL; + lg->pgdidx = 0; + lg->pgdirs[lg->pgdidx].cr3 = pgtable; + lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[lg->pgdidx].pgdir) + return -ENOMEM; + return 0; +} + +void free_guest_pagetable(struct lguest *lg) +{ + unsigned int i; + + release_all_pagetables(lg); + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + free_page((long)lg->pgdirs[i].pgdir); +} + +/* Caller must be preempt-safe */ +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) +{ + spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); + spgd_t switcher_pgd; + spte_t regs_pte; + + /* Since switcher less that 4MB, we simply mug top pte page. */ + switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; + switcher_pgd.flags = _PAGE_KERNEL; + lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + + /* Map our regs page over stack page. */ + regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; + regs_pte.flags = _PAGE_KERNEL; + switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE] + = regs_pte; +} + +static void free_switcher_pte_pages(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + free_page((long)switcher_pte_page(i)); +} + +static __init void populate_switcher_pte_page(unsigned int cpu, + struct page *switcher_page[], + unsigned int pages) +{ + unsigned int i; + spte_t *pte = switcher_pte_page(cpu); + + for (i = 0; i < pages; i++) { + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; + } + + /* We only map this CPU's pages, so guest can't see others. */ + i = pages + cpu*2; + + /* First page (regs) is rw, second (state) is ro. */ + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; + pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); + pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; +} + +__init int init_pagetables(struct page **switcher_page, unsigned int pages) +{ + unsigned int i; + + for_each_possible_cpu(i) { + switcher_pte_page(i) = (spte_t *)get_zeroed_page(GFP_KERNEL); + if (!switcher_pte_page(i)) { + free_switcher_pte_pages(); + return -ENOMEM; + } + populate_switcher_pte_page(i, switcher_page, pages); + } + return 0; +} + +void free_pagetables(void) +{ + free_switcher_pte_pages(); +} diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c new file mode 100644 index 00000000000..1b2cfe89dcd --- /dev/null +++ b/drivers/lguest/segments.c @@ -0,0 +1,125 @@ +#include "lg.h" + +static int desc_ok(const struct desc_struct *gdt) +{ + /* MBZ=0, P=1, DT=1 */ + return ((gdt->b & 0x00209000) == 0x00009000); +} + +static int segment_present(const struct desc_struct *gdt) +{ + return gdt->b & 0x8000; +} + +static int ignored_gdt(unsigned int num) +{ + return (num == GDT_ENTRY_TSS + || num == GDT_ENTRY_LGUEST_CS + || num == GDT_ENTRY_LGUEST_DS + || num == GDT_ENTRY_DOUBLEFAULT_TSS); +} + +/* We don't allow removal of CS, DS or SS; it doesn't make sense. */ +static void check_segment_use(struct lguest *lg, unsigned int desc) +{ + if (lg->regs->gs / 8 == desc) + lg->regs->gs = 0; + if (lg->regs->fs / 8 == desc) + lg->regs->fs = 0; + if (lg->regs->es / 8 == desc) + lg->regs->es = 0; + if (lg->regs->ds / 8 == desc + || lg->regs->cs / 8 == desc + || lg->regs->ss / 8 == desc) + kill_guest(lg, "Removed live GDT entry %u", desc); +} + +static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) +{ + unsigned int i; + + for (i = start; i < end; i++) { + /* We never copy these ones to real gdt */ + if (ignored_gdt(i)) + continue; + + /* We could fault in switch_to_guest if they are using + * a removed segment. */ + if (!segment_present(&lg->gdt[i])) { + check_segment_use(lg, i); + continue; + } + + if (!desc_ok(&lg->gdt[i])) + kill_guest(lg, "Bad GDT descriptor %i", i); + + /* DPL 0 presumably means "for use by guest". */ + if ((lg->gdt[i].b & 0x00006000) == 0) + lg->gdt[i].b |= (GUEST_PL << 13); + + /* Set accessed bit, since gdt isn't writable. */ + lg->gdt[i].b |= 0x00000100; + } +} + +void setup_default_gdt_entries(struct lguest_ro_state *state) +{ + struct desc_struct *gdt = state->guest_gdt; + unsigned long tss = (unsigned long)&state->guest_tss; + + /* Hypervisor segments. */ + gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + + /* This is the one which we *cannot* copy from guest, since tss + is depended on this lguest_ro_state, ie. this cpu. */ + gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); + gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) + | ((tss >> 16) & 0x000000FF); +} + +void setup_guest_gdt(struct lguest *lg) +{ + lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); + lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); +} + +/* This is a fast version for the common case where only the three TLS entries + * have changed. */ +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) + gdt[i] = lg->gdt[i]; +} + +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = 0; i < GDT_ENTRIES; i++) + if (!ignored_gdt(i)) + gdt[i] = lg->gdt[i]; +} + +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) +{ + if (num > ARRAY_SIZE(lg->gdt)) + kill_guest(lg, "too many gdt entries %i", num); + + lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); + fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); + lg->changed |= CHANGED_GDT; +} + +void guest_load_tls(struct lguest *lg, unsigned long gtls) +{ + struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN]; + + lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); + fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); + lg->changed |= CHANGED_GDT_TLS; +} diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S new file mode 100644 index 00000000000..eadd4cc299d --- /dev/null +++ b/drivers/lguest/switcher.S @@ -0,0 +1,159 @@ +/* This code sits at 0xFFC00000 to do the low-level guest<->host switch. + + There is are two pages above us for this CPU (struct lguest_pages). + The second page (struct lguest_ro_state) becomes read-only after the + context switch. The first page (the stack for traps) remains writable, + but while we're in here, the guest cannot be running. +*/ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include "lg.h" + +.text +ENTRY(start_switcher_text) + +/* %eax points to lguest pages for this CPU. %ebx contains cr3 value. + All normal registers can be clobbered! */ +ENTRY(switch_to_guest) + /* Save host segments on host stack. */ + pushl %es + pushl %ds + pushl %gs + pushl %fs + /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ + pushl %ebp + /* Save host stack. */ + movl %esp, LGUEST_PAGES_host_sp(%eax) + /* Switch to guest stack: if we get NMI we expect to be there. */ + movl %eax, %edx + addl $LGUEST_PAGES_regs, %edx + movl %edx, %esp + /* Switch to guest's GDT, IDT. */ + lgdt LGUEST_PAGES_guest_gdt_desc(%eax) + lidt LGUEST_PAGES_guest_idt_desc(%eax) + /* Switch to guest's TSS while GDT still writable. */ + movl $(GDT_ENTRY_TSS*8), %edx + ltr %dx + /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ + movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx + andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) + /* Switch to guest page tables: lguest_pages->state now read-only. */ + movl %ebx, %cr3 + /* Restore guest regs */ + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %gs + popl %eax + popl %fs + popl %ds + popl %es + /* Skip error code and trap number */ + addl $8, %esp + iret + +#define SWITCH_TO_HOST \ + /* Save guest state */ \ + pushl %es; \ + pushl %ds; \ + pushl %fs; \ + pushl %eax; \ + pushl %gs; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + /* Load lguest ds segment for convenience. */ \ + movl $(LGUEST_DS), %eax; \ + movl %eax, %ds; \ + /* Figure out where we are, based on stack (at top of regs). */ \ + movl %esp, %eax; \ + subl $LGUEST_PAGES_regs, %eax; \ + /* Put trap number in %ebx before we switch cr3 and lose it. */ \ + movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ + /* Switch to host page tables (host GDT, IDT and stack are in host \ + mem, so need this first) */ \ + movl LGUEST_PAGES_host_cr3(%eax), %edx; \ + movl %edx, %cr3; \ + /* Set guest's TSS to available (clear byte 5 bit 2). */ \ + andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ + /* Switch to host's GDT & IDT. */ \ + lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ + lidt LGUEST_PAGES_host_idt_desc(%eax); \ + /* Switch to host's stack. */ \ + movl LGUEST_PAGES_host_sp(%eax), %esp; \ + /* Switch to host's TSS */ \ + movl $(GDT_ENTRY_TSS*8), %edx; \ + ltr %dx; \ + popl %ebp; \ + popl %fs; \ + popl %gs; \ + popl %ds; \ + popl %es + +/* Return to run_guest_once. */ +return_to_host: + SWITCH_TO_HOST + iret + +deliver_to_host: + SWITCH_TO_HOST + /* Decode IDT and jump to hosts' irq handler. When that does iret, it + * will return to run_guest_once. This is a feature. */ + movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx + leal (%edx,%ebx,8), %eax + movzwl (%eax),%edx + movl 4(%eax), %eax + xorw %ax, %ax + orl %eax, %edx + jmp *%edx + +/* Real hardware interrupts are delivered straight to the host. Others + cause us to return to run_guest_once so it can decide what to do. Note + that some of these are overridden by the guest to deliver directly, and + never enter here (see load_guest_idt_entry). */ +.macro IRQ_STUB N TARGET + .data; .long 1f; .text; 1: + /* Make an error number for most traps, which don't have one. */ + .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) + pushl $0 + .endif + pushl $\N + jmp \TARGET + ALIGN +.endm + +.macro IRQ_STUBS FIRST LAST TARGET + irq=\FIRST + .rept \LAST-\FIRST+1 + IRQ_STUB irq \TARGET + irq=irq+1 + .endr +.endm + +/* We intercept every interrupt, because we may need to switch back to + * host. Unfortunately we can't tell them apart except by entry + * point, so we need 256 entry points. + */ +.data +.global default_idt_entries +default_idt_entries: +.text + IRQ_STUBS 0 1 return_to_host /* First two traps */ + IRQ_STUB 2 handle_nmi /* NMI */ + IRQ_STUBS 3 31 return_to_host /* Rest of traps */ + IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ + IRQ_STUB 128 return_to_host /* System call (overridden) */ + IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ + +/* We ignore NMI and return. */ +handle_nmi: + addl $8, %esp + iret + +ENTRY(end_switcher_text) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c96b7fe882a..ec9e5f32f0a 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -365,10 +365,9 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, if (np == NULL) return NULL; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; - memset(dev, 0, sizeof(*dev)); dev->bus = &chip->lbus; dev->media_bay = in_bay; diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index f8e1a135bf9..d409f675948 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -1053,10 +1053,9 @@ static int smu_open(struct inode *inode, struct file *file) struct smu_private *pp; unsigned long flags; - pp = kmalloc(sizeof(struct smu_private), GFP_KERNEL); + pp = kzalloc(sizeof(struct smu_private), GFP_KERNEL); if (pp == 0) return -ENOMEM; - memset(pp, 0, sizeof(struct smu_private)); spin_lock_init(&pp->lock); pp->mode = smu_file_commands; init_waitqueue_head(&pp->wait); diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 3d90fc00209..e43554e754a 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -318,10 +318,9 @@ static struct i2c_client *attach_i2c_chip(int id, const char *name) if (adap == NULL) return NULL; - clt = kmalloc(sizeof(struct i2c_client), GFP_KERNEL); + clt = kzalloc(sizeof(struct i2c_client), GFP_KERNEL); if (clt == NULL) return NULL; - memset(clt, 0, sizeof(struct i2c_client)); clt->addr = (id >> 1) & 0x7f; clt->adapter = adap; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 3d0354e96a9..5452da1bb1a 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -431,9 +431,8 @@ do_probe( struct i2c_adapter *adapter, int addr, int kind ) | I2C_FUNC_SMBUS_WRITE_BYTE) ) return 0; - if( !(cl=kmalloc(sizeof(*cl), GFP_KERNEL)) ) + if( !(cl=kzalloc(sizeof(*cl), GFP_KERNEL)) ) return -ENOMEM; - memset( cl, 0, sizeof(struct i2c_client) ); cl->addr = addr; cl->adapter = adapter; diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index a0fabf3c200..7e10c3ab4d5 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -117,10 +117,9 @@ static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, DBG("wf_lm75: creating %s device at address 0x%02x\n", ds1775 ? "ds1775" : "lm75", addr); - lm = kmalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); + lm = kzalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); if (lm == NULL) return NULL; - memset(lm, 0, sizeof(struct wf_lm75_sensor)); /* Usual rant about sensor names not beeing very consistent in * the device-tree, oh well ... diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 1a876f9965e..144071e70a9 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -951,13 +951,12 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); - ms = kmalloc(len, GFP_KERNEL); + ms = kzalloc(len, GFP_KERNEL); if (!ms) { ti->error = "Cannot allocate mirror context"; return NULL; } - memset(ms, 0, len); spin_lock_init(&ms->lock); ms->ti = ti; diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 5a1449f485c..28929b618e2 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -905,12 +905,11 @@ static int cinergyt2_probe (struct usb_interface *intf, struct cinergyt2 *cinergyt2; int err; - if (!(cinergyt2 = kmalloc (sizeof(struct cinergyt2), GFP_KERNEL))) { + if (!(cinergyt2 = kzalloc (sizeof(struct cinergyt2), GFP_KERNEL))) { dprintk(1, "out of memory?!?\n"); return -ENOMEM; } - memset (cinergyt2, 0, sizeof (struct cinergyt2)); usb_set_intfdata (intf, (void *) cinergyt2); mutex_init(&cinergyt2->sem); diff --git a/drivers/media/video/cpia2/cpia2_core.c b/drivers/media/video/cpia2/cpia2_core.c index 55aab8d3888..a76bd786cf1 100644 --- a/drivers/media/video/cpia2/cpia2_core.c +++ b/drivers/media/video/cpia2/cpia2_core.c @@ -2224,15 +2224,13 @@ struct camera_data *cpia2_init_camera_struct(void) { struct camera_data *cam; - cam = kmalloc(sizeof(*cam), GFP_KERNEL); + cam = kzalloc(sizeof(*cam), GFP_KERNEL); if (!cam) { ERR("couldn't kmalloc cpia2 struct\n"); return NULL; } - /* Default everything to 0 */ - memset(cam, 0, sizeof(struct camera_data)); cam->present = 1; mutex_init(&cam->busy_lock); diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index 507b1d4260e..11cfcf18ec3 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -812,10 +812,9 @@ static int msp_attach(struct i2c_adapter *adapter, int address, int kind) int msp_product, msp_prod_hi, msp_prod_lo; int msp_rom; - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return -ENOMEM; - memset(client, 0, sizeof(*client)); client->addr = address; client->adapter = adapter; client->driver = &i2c_driver; diff --git a/drivers/media/video/planb.c b/drivers/media/video/planb.c index 1455a8f4e93..4ab1af74a97 100644 --- a/drivers/media/video/planb.c +++ b/drivers/media/video/planb.c @@ -353,9 +353,8 @@ static int planb_prepare_open(struct planb *pb) * PLANB_DUMMY)*sizeof(struct dbdma_cmd) +(PLANB_MAXLINES*((PLANB_MAXPIXELS+7)& ~7))/8 +MAX_GBUFFERS*sizeof(unsigned int); - if ((pb->priv_space = kmalloc (size, GFP_KERNEL)) == 0) + if ((pb->priv_space = kzalloc (size, GFP_KERNEL)) == 0) return -ENOMEM; - memset ((void *) pb->priv_space, 0, size); pb->overlay_last1 = pb->ch1_cmd = (volatile struct dbdma_cmd *) DBDMA_ALIGN (pb->priv_space); pb->overlay_last2 = pb->ch2_cmd = pb->ch1_cmd + pb->tab_size; diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 2d9c0dd3b73..ff555129c82 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -1130,13 +1130,12 @@ vicam_probe( struct usb_interface *intf, const struct usb_device_id *id) } if ((cam = - kmalloc(sizeof (struct vicam_camera), GFP_KERNEL)) == NULL) { + kzalloc(sizeof (struct vicam_camera), GFP_KERNEL)) == NULL) { printk(KERN_WARNING "could not allocate kernel memory for vicam_camera struct\n"); return -ENOMEM; } - memset(cam, 0, sizeof (struct vicam_camera)); cam->shutter_speed = 15; diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 75f401d52fd..b4ed57e0272 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -200,9 +200,8 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size) { struct mcp *mcp; - mcp = kmalloc(sizeof(struct mcp) + size, GFP_KERNEL); + mcp = kzalloc(sizeof(struct mcp) + size, GFP_KERNEL); if (mcp) { - memset(mcp, 0, sizeof(struct mcp) + size); spin_lock_init(&mcp->lock); mcp->attached_device.parent = parent; mcp->attached_device.bus = &mcp_bus_type; diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 149810a084f..e03f1bcd4f9 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -484,12 +484,11 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_disable; } - ucb = kmalloc(sizeof(struct ucb1x00), GFP_KERNEL); + ucb = kzalloc(sizeof(struct ucb1x00), GFP_KERNEL); ret = -ENOMEM; if (!ucb) goto err_disable; - memset(ucb, 0, sizeof(struct ucb1x00)); ucb->cdev.class = &ucb1x00_class; ucb->cdev.dev = &mcp->attached_device; diff --git a/drivers/misc/asus-laptop.c b/drivers/misc/asus-laptop.c index 7798f590e5a..f7530605997 100644 --- a/drivers/misc/asus-laptop.c +++ b/drivers/misc/asus-laptop.c @@ -979,10 +979,9 @@ static int asus_hotk_add(struct acpi_device *device) printk(ASUS_NOTICE "Asus Laptop Support version %s\n", ASUS_LAPTOP_VERSION); - hotk = kmalloc(sizeof(struct asus_hotk), GFP_KERNEL); + hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL); if (!hotk) return -ENOMEM; - memset(hotk, 0, sizeof(struct asus_hotk)); hotk->handle = device->handle; strcpy(acpi_device_name(device), ASUS_HOTK_DEVICE_NAME); diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index b5df347c81b..6497872df52 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -41,18 +41,16 @@ struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_s if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE) return NULL; - cmd = kmalloc(sizeof(struct command), GFP_KERNEL); + cmd = kzalloc(sizeof(struct command), GFP_KERNEL); if (cmd == NULL) return NULL; - memset(cmd, 0, sizeof(*cmd)); - cmd->buffer = kmalloc(buffer_size, GFP_KERNEL); + cmd->buffer = kzalloc(buffer_size, GFP_KERNEL); if (cmd->buffer == NULL) { kfree(cmd); return NULL; } - memset(cmd->buffer, 0, buffer_size); cmd->buffer_size = buffer_size; kobject_init(&cmd->kobj); diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index eb7b073734b..22a7e8ba211 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -563,11 +563,10 @@ static ssize_t remote_settings_file_write(struct file *file, const char __user * if (*offset != 0) return 0; - buff = kmalloc (count + 1, GFP_KERNEL); + buff = kzalloc (count + 1, GFP_KERNEL); if (!buff) return -ENOMEM; - memset(buff, 0x0, count + 1); if (copy_from_user(buff, ubuff, count)) { kfree(buff); diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c index fb03a853fac..4f9d4a9da98 100644 --- a/drivers/misc/ibmasm/module.c +++ b/drivers/misc/ibmasm/module.c @@ -77,13 +77,12 @@ static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_devi /* vnc client won't work without bus-mastering */ pci_set_master(pdev); - sp = kmalloc(sizeof(struct service_processor), GFP_KERNEL); + sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL); if (sp == NULL) { dev_err(&pdev->dev, "Failed to allocate memory\n"); result = -ENOMEM; goto error_kmalloc; } - memset(sp, 0, sizeof(struct service_processor)); spin_lock_init(&sp->lock); INIT_LIST_HEAD(&sp->command_queue); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index cbd4b6e3e17..93fe2e5dd61 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -414,13 +414,12 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) return ERR_PTR(-ENOSPC); __set_bit(devidx, dev_use); - md = kmalloc(sizeof(struct mmc_blk_data), GFP_KERNEL); + md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL); if (!md) { ret = -ENOMEM; goto out; } - memset(md, 0, sizeof(struct mmc_blk_data)); /* * Set the read-only status based on the supported commands diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5fb659f8b20..3073f679584 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -838,6 +838,50 @@ config ULTRA32 <file:Documentation/networking/net-modules.txt>. The module will be called smc-ultra32. +config BFIN_MAC + tristate "Blackfin 536/537 on-chip mac support" + depends on NET_ETHERNET && (BF537 || BF536) && (!BF537_PORT_H) + select CRC32 + select BFIN_MAC_USE_L1 if DMA_UNCACHED_NONE + help + This is the driver for blackfin on-chip mac device. Say Y if you want it + compiled into the kernel. This driver is also available as a module + ( = code which can be inserted in and removed from the running kernel + whenever you want). The module will be called bfin_mac. + +config BFIN_MAC_USE_L1 + bool "Use L1 memory for rx/tx packets" + depends on BFIN_MAC && BF537 + default y + help + To get maximum network performace, you should use L1 memory as rx/tx buffers. + Say N here if you want to reserve L1 memory for other uses. + +config BFIN_TX_DESC_NUM + int "Number of transmit buffer packets" + depends on BFIN_MAC + range 6 10 if BFIN_MAC_USE_L1 + range 10 100 + default "10" + help + Set the number of buffer packets used in driver. + +config BFIN_RX_DESC_NUM + int "Number of receive buffer packets" + depends on BFIN_MAC + range 20 100 if BFIN_MAC_USE_L1 + range 20 800 + default "20" + help + Set the number of buffer packets used in driver. + +config BFIN_MAC_RMII + bool "RMII PHY Interface (EXPERIMENTAL)" + depends on BFIN_MAC && EXPERIMENTAL + default n + help + Use Reduced PHY MII Interface + config SMC9194 tristate "SMC 9194 support" depends on NET_VENDOR_SMC && (ISA || MAC && BROKEN) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0e286ab8855..336af0635df 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -177,6 +177,7 @@ obj-$(CONFIG_ZORRO8390) += zorro8390.o obj-$(CONFIG_HPLANCE) += hplance.o 7990.o obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o obj-$(CONFIG_EQUALIZER) += eql.o +obj-$(CONFIG_LGUEST_GUEST) += lguest_net.o obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o @@ -200,6 +201,7 @@ obj-$(CONFIG_S2IO) += s2io.o obj-$(CONFIG_MYRI10GE) += myri10ge/ obj-$(CONFIG_SMC91X) += smc91x.o obj-$(CONFIG_SMC911X) += smc911x.o +obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_FEC_8XX) += fec_8xx/ obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c index da713500654..a7cac695a9b 100644 --- a/drivers/net/arm/ether3.c +++ b/drivers/net/arm/ether3.c @@ -464,7 +464,7 @@ static void ether3_setmulticastlist(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* promiscuous mode */ priv(dev)->regs.config1 |= CFG1_RECVPROMISC; - } else if (dev->flags & IFF_ALLMULTI) { + } else if (dev->flags & IFF_ALLMULTI || dev->mc_count) { priv(dev)->regs.config1 |= CFG1_RECVSPECBRMULTI; } else priv(dev)->regs.config1 |= CFG1_RECVSPECBROAD; diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 96fb0ec905a..37f1b6ff5c1 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -1519,14 +1519,13 @@ static void b44_setup_pseudo_magicp(struct b44 *bp) u8 *pwol_pattern; u8 pwol_mask[B44_PMASK_SIZE]; - pwol_pattern = kmalloc(B44_PATTERN_SIZE, GFP_KERNEL); + pwol_pattern = kzalloc(B44_PATTERN_SIZE, GFP_KERNEL); if (!pwol_pattern) { printk(KERN_ERR PFX "Memory not available for WOL\n"); return; } /* Ipv4 magic packet pattern - pattern 0.*/ - memset(pwol_pattern, 0, B44_PATTERN_SIZE); memset(pwol_mask, 0, B44_PMASK_SIZE); plen0 = b44_magic_pattern(bp->dev->dev_addr, pwol_pattern, pwol_mask, B44_ETHIPV4UDP_HLEN); diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c new file mode 100644 index 00000000000..9a08d656f1c --- /dev/null +++ b/drivers/net/bfin_mac.c @@ -0,0 +1,1009 @@ +/* + * File: drivers/net/bfin_mac.c + * Based on: + * Maintainer: + * Bryan Wu <bryan.wu@analog.com> + * + * Original author: + * Luke Yang <luke.yang@analog.com> + * + * Created: + * Description: + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program ; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/timer.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/crc32.h> +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/ethtool.h> +#include <linux/mii.h> + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include <linux/platform_device.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include <asm/dma.h> +#include <linux/dma-mapping.h> + +#include <asm/blackfin.h> +#include <asm/cacheflush.h> +#include <asm/portmux.h> + +#include "bfin_mac.h" + +#define DRV_NAME "bfin_mac" +#define DRV_VERSION "1.1" +#define DRV_AUTHOR "Bryan Wu, Luke Yang" +#define DRV_DESC "Blackfin BF53[67] on-chip Ethernet MAC driver" + +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRV_DESC); + +#if defined(CONFIG_BFIN_MAC_USE_L1) +# define bfin_mac_alloc(dma_handle, size) l1_data_sram_zalloc(size) +# define bfin_mac_free(dma_handle, ptr) l1_data_sram_free(ptr) +#else +# define bfin_mac_alloc(dma_handle, size) \ + dma_alloc_coherent(NULL, size, dma_handle, GFP_KERNEL) +# define bfin_mac_free(dma_handle, ptr) \ + dma_free_coherent(NULL, sizeof(*ptr), ptr, dma_handle) +#endif + +#define PKT_BUF_SZ 1580 + +#define MAX_TIMEOUT_CNT 500 + +/* pointers to maintain transmit list */ +static struct net_dma_desc_tx *tx_list_head; +static struct net_dma_desc_tx *tx_list_tail; +static struct net_dma_desc_rx *rx_list_head; +static struct net_dma_desc_rx *rx_list_tail; +static struct net_dma_desc_rx *current_rx_ptr; +static struct net_dma_desc_tx *current_tx_ptr; +static struct net_dma_desc_tx *tx_desc; +static struct net_dma_desc_rx *rx_desc; + +static void desc_list_free(void) +{ + struct net_dma_desc_rx *r; + struct net_dma_desc_tx *t; + int i; +#if !defined(CONFIG_BFIN_MAC_USE_L1) + dma_addr_t dma_handle = 0; +#endif + + if (tx_desc) { + t = tx_list_head; + for (i = 0; i < CONFIG_BFIN_TX_DESC_NUM; i++) { + if (t) { + if (t->skb) { + dev_kfree_skb(t->skb); + t->skb = NULL; + } + t = t->next; + } + } + bfin_mac_free(dma_handle, tx_desc); + } + + if (rx_desc) { + r = rx_list_head; + for (i = 0; i < CONFIG_BFIN_RX_DESC_NUM; i++) { + if (r) { + if (r->skb) { + dev_kfree_skb(r->skb); + r->skb = NULL; + } + r = r->next; + } + } + bfin_mac_free(dma_handle, rx_desc); + } +} + +static int desc_list_init(void) +{ + int i; + struct sk_buff *new_skb; +#if !defined(CONFIG_BFIN_MAC_USE_L1) + /* + * This dma_handle is useless in Blackfin dma_alloc_coherent(). + * The real dma handler is the return value of dma_alloc_coherent(). + */ + dma_addr_t dma_handle; +#endif + + tx_desc = bfin_mac_alloc(&dma_handle, + sizeof(struct net_dma_desc_tx) * + CONFIG_BFIN_TX_DESC_NUM); + if (tx_desc == NULL) + goto init_error; + + rx_desc = bfin_mac_alloc(&dma_handle, + sizeof(struct net_dma_desc_rx) * + CONFIG_BFIN_RX_DESC_NUM); + if (rx_desc == NULL) + goto init_error; + + /* init tx_list */ + tx_list_head = tx_list_tail = tx_desc; + + for (i = 0; i < CONFIG_BFIN_TX_DESC_NUM; i++) { + struct net_dma_desc_tx *t = tx_desc + i; + struct dma_descriptor *a = &(t->desc_a); + struct dma_descriptor *b = &(t->desc_b); + + /* + * disable DMA + * read from memory WNR = 0 + * wordsize is 32 bits + * 6 half words is desc size + * large desc flow + */ + a->config = WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + a->start_addr = (unsigned long)t->packet; + a->x_count = 0; + a->next_dma_desc = b; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * disable interrupt + * 6 half words is desc size + * large desc flow + */ + b->config = DMAEN | WNR | WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + b->start_addr = (unsigned long)(&(t->status)); + b->x_count = 0; + + t->skb = NULL; + tx_list_tail->desc_b.next_dma_desc = a; + tx_list_tail->next = t; + tx_list_tail = t; + } + tx_list_tail->next = tx_list_head; /* tx_list is a circle */ + tx_list_tail->desc_b.next_dma_desc = &(tx_list_head->desc_a); + current_tx_ptr = tx_list_head; + + /* init rx_list */ + rx_list_head = rx_list_tail = rx_desc; + + for (i = 0; i < CONFIG_BFIN_RX_DESC_NUM; i++) { + struct net_dma_desc_rx *r = rx_desc + i; + struct dma_descriptor *a = &(r->desc_a); + struct dma_descriptor *b = &(r->desc_b); + + /* allocate a new skb for next time receive */ + new_skb = dev_alloc_skb(PKT_BUF_SZ + 2); + if (!new_skb) { + printk(KERN_NOTICE DRV_NAME + ": init: low on mem - packet dropped\n"); + goto init_error; + } + skb_reserve(new_skb, 2); + r->skb = new_skb; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * disable interrupt + * 6 half words is desc size + * large desc flow + */ + a->config = DMAEN | WNR | WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + /* since RXDWA is enabled */ + a->start_addr = (unsigned long)new_skb->data - 2; + a->x_count = 0; + a->next_dma_desc = b; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * enable interrupt + * 6 half words is desc size + * large desc flow + */ + b->config = DMAEN | WNR | WDSIZE_32 | DI_EN | + NDSIZE_6 | DMAFLOW_LARGE; + b->start_addr = (unsigned long)(&(r->status)); + b->x_count = 0; + + rx_list_tail->desc_b.next_dma_desc = a; + rx_list_tail->next = r; + rx_list_tail = r; + } + rx_list_tail->next = rx_list_head; /* rx_list is a circle */ + rx_list_tail->desc_b.next_dma_desc = &(rx_list_head->desc_a); + current_rx_ptr = rx_list_head; + + return 0; + +init_error: + desc_list_free(); + printk(KERN_ERR DRV_NAME ": kmalloc failed\n"); + return -ENOMEM; +} + + +/*---PHY CONTROL AND CONFIGURATION-----------------------------------------*/ + +/* Set FER regs to MUX in Ethernet pins */ +static int setup_pin_mux(int action) +{ +#if defined(CONFIG_BFIN_MAC_RMII) + u16 pin_req[] = P_RMII0; +#else + u16 pin_req[] = P_MII0; +#endif + + if (action) { + if (peripheral_request_list(pin_req, DRV_NAME)) { + printk(KERN_ERR DRV_NAME + ": Requesting Peripherals failed\n"); + return -EFAULT; + } + } else + peripheral_free_list(pin_req); + + return 0; +} + +/* Wait until the previous MDC/MDIO transaction has completed */ +static void poll_mdc_done(void) +{ + int timeout_cnt = MAX_TIMEOUT_CNT; + + /* poll the STABUSY bit */ + while ((bfin_read_EMAC_STAADD()) & STABUSY) { + mdelay(10); + if (timeout_cnt-- < 0) { + printk(KERN_ERR DRV_NAME + ": wait MDC/MDIO transaction to complete timeout\n"); + break; + } + } +} + +/* Read an off-chip register in a PHY through the MDC/MDIO port */ +static u16 read_phy_reg(u16 PHYAddr, u16 RegAddr) +{ + poll_mdc_done(); + /* read mode */ + bfin_write_EMAC_STAADD(SET_PHYAD(PHYAddr) | + SET_REGAD(RegAddr) | + STABUSY); + poll_mdc_done(); + + return (u16) bfin_read_EMAC_STADAT(); +} + +/* Write an off-chip register in a PHY through the MDC/MDIO port */ +static void raw_write_phy_reg(u16 PHYAddr, u16 RegAddr, u32 Data) +{ + bfin_write_EMAC_STADAT(Data); + + /* write mode */ + bfin_write_EMAC_STAADD(SET_PHYAD(PHYAddr) | + SET_REGAD(RegAddr) | + STAOP | + STABUSY); + + poll_mdc_done(); +} + +static void write_phy_reg(u16 PHYAddr, u16 RegAddr, u32 Data) +{ + poll_mdc_done(); + raw_write_phy_reg(PHYAddr, RegAddr, Data); +} + +/* set up the phy */ +static void bf537mac_setphy(struct net_device *dev) +{ + u16 phydat; + struct bf537mac_local *lp = netdev_priv(dev); + + /* Program PHY registers */ + pr_debug("start setting up phy\n"); + + /* issue a reset */ + raw_write_phy_reg(lp->PhyAddr, PHYREG_MODECTL, 0x8000); + + /* wait half a second */ + msleep(500); + + phydat = read_phy_reg(lp->PhyAddr, PHYREG_MODECTL); + + /* advertise flow control supported */ + phydat = read_phy_reg(lp->PhyAddr, PHYREG_ANAR); + phydat |= (1 << 10); + write_phy_reg(lp->PhyAddr, PHYREG_ANAR, phydat); + + phydat = 0; + if (lp->Negotiate) + phydat |= 0x1000; /* enable auto negotiation */ + else { + if (lp->FullDuplex) + phydat |= (1 << 8); /* full duplex */ + else + phydat &= (~(1 << 8)); /* half duplex */ + + if (!lp->Port10) + phydat |= (1 << 13); /* 100 Mbps */ + else + phydat &= (~(1 << 13)); /* 10 Mbps */ + } + + if (lp->Loopback) + phydat |= (1 << 14); /* enable TX->RX loopback */ + + write_phy_reg(lp->PhyAddr, PHYREG_MODECTL, phydat); + msleep(500); + + phydat = read_phy_reg(lp->PhyAddr, PHYREG_MODECTL); + /* check for SMSC PHY */ + if ((read_phy_reg(lp->PhyAddr, PHYREG_PHYID1) == 0x7) && + ((read_phy_reg(lp->PhyAddr, PHYREG_PHYID2) & 0xfff0) == 0xC0A0)) { + /* + * we have SMSC PHY so reqest interrupt + * on link down condition + */ + + /* enable interrupts */ + write_phy_reg(lp->PhyAddr, 30, 0x0ff); + } +} + +/**************************************************************************/ +void setup_system_regs(struct net_device *dev) +{ + int phyaddr; + unsigned short sysctl, phydat; + u32 opmode; + struct bf537mac_local *lp = netdev_priv(dev); + int count = 0; + + phyaddr = lp->PhyAddr; + + /* Enable PHY output */ + if (!(bfin_read_VR_CTL() & PHYCLKOE)) + bfin_write_VR_CTL(bfin_read_VR_CTL() | PHYCLKOE); + + /* MDC = 2.5 MHz */ + sysctl = SET_MDCDIV(24); + /* Odd word alignment for Receive Frame DMA word */ + /* Configure checksum support and rcve frame word alignment */ +#if defined(BFIN_MAC_CSUM_OFFLOAD) + sysctl |= RXDWA | RXCKS; +#else + sysctl |= RXDWA; +#endif + bfin_write_EMAC_SYSCTL(sysctl); + /* auto negotiation on */ + /* full duplex */ + /* 100 Mbps */ + phydat = PHY_ANEG_EN | PHY_DUPLEX | PHY_SPD_SET; + write_phy_reg(phyaddr, PHYREG_MODECTL, phydat); + + /* test if full duplex supported */ + do { + msleep(100); + phydat = read_phy_reg(phyaddr, PHYREG_MODESTAT); + if (count > 30) { + printk(KERN_NOTICE DRV_NAME ": Link is down\n"); + printk(KERN_NOTICE DRV_NAME + "please check your network connection\n"); + break; + } + count++; + } while (!(phydat & 0x0004)); + + phydat = read_phy_reg(phyaddr, PHYREG_ANLPAR); + + if ((phydat & 0x0100) || (phydat & 0x0040)) { + opmode = FDMODE; + } else { + opmode = 0; + printk(KERN_INFO DRV_NAME + ": Network is set to half duplex\n"); + } + +#if defined(CONFIG_BFIN_MAC_RMII) + opmode |= RMII; /* For Now only 100MBit are supported */ +#endif + + bfin_write_EMAC_OPMODE(opmode); + + bfin_write_EMAC_MMC_CTL(RSTC | CROLL); + + /* Initialize the TX DMA channel registers */ + bfin_write_DMA2_X_COUNT(0); + bfin_write_DMA2_X_MODIFY(4); + bfin_write_DMA2_Y_COUNT(0); + bfin_write_DMA2_Y_MODIFY(0); + + /* Initialize the RX DMA channel registers */ + bfin_write_DMA1_X_COUNT(0); + bfin_write_DMA1_X_MODIFY(4); + bfin_write_DMA1_Y_COUNT(0); + bfin_write_DMA1_Y_MODIFY(0); +} + +void setup_mac_addr(u8 * mac_addr) +{ + u32 addr_low = le32_to_cpu(*(__le32 *) & mac_addr[0]); + u16 addr_hi = le16_to_cpu(*(__le16 *) & mac_addr[4]); + + /* this depends on a little-endian machine */ + bfin_write_EMAC_ADDRLO(addr_low); + bfin_write_EMAC_ADDRHI(addr_hi); +} + +static void adjust_tx_list(void) +{ + int timeout_cnt = MAX_TIMEOUT_CNT; + + if (tx_list_head->status.status_word != 0 + && current_tx_ptr != tx_list_head) { + goto adjust_head; /* released something, just return; */ + } + + /* + * if nothing released, check wait condition + * current's next can not be the head, + * otherwise the dma will not stop as we want + */ + if (current_tx_ptr->next->next == tx_list_head) { + while (tx_list_head->status.status_word == 0) { + mdelay(10); + if (tx_list_head->status.status_word != 0 + || !(bfin_read_DMA2_IRQ_STATUS() & 0x08)) { + goto adjust_head; + } + if (timeout_cnt-- < 0) { + printk(KERN_ERR DRV_NAME + ": wait for adjust tx list head timeout\n"); + break; + } + } + if (tx_list_head->status.status_word != 0) { + goto adjust_head; + } + } + + return; + +adjust_head: + do { + tx_list_head->desc_a.config &= ~DMAEN; + tx_list_head->status.status_word = 0; + if (tx_list_head->skb) { + dev_kfree_skb(tx_list_head->skb); + tx_list_head->skb = NULL; + } else { + printk(KERN_ERR DRV_NAME + ": no sk_buff in a transmitted frame!\n"); + } + tx_list_head = tx_list_head->next; + } while (tx_list_head->status.status_word != 0 + && current_tx_ptr != tx_list_head); + return; + +} + +static int bf537mac_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + unsigned int data; + + current_tx_ptr->skb = skb; + + /* + * Is skb->data always 16-bit aligned? + * Do we need to memcpy((char *)(tail->packet + 2), skb->data, len)? + */ + if ((((unsigned int)(skb->data)) & 0x02) == 2) { + /* move skb->data to current_tx_ptr payload */ + data = (unsigned int)(skb->data) - 2; + *((unsigned short *)data) = (unsigned short)(skb->len); + current_tx_ptr->desc_a.start_addr = (unsigned long)data; + /* this is important! */ + blackfin_dcache_flush_range(data, (data + (skb->len)) + 2); + + } else { + *((unsigned short *)(current_tx_ptr->packet)) = + (unsigned short)(skb->len); + memcpy((char *)(current_tx_ptr->packet + 2), skb->data, + (skb->len)); + current_tx_ptr->desc_a.start_addr = + (unsigned long)current_tx_ptr->packet; + if (current_tx_ptr->status.status_word != 0) + current_tx_ptr->status.status_word = 0; + blackfin_dcache_flush_range((unsigned int)current_tx_ptr-> + packet, + (unsigned int)(current_tx_ptr-> + packet + skb->len) + + 2); + } + + /* enable this packet's dma */ + current_tx_ptr->desc_a.config |= DMAEN; + + /* tx dma is running, just return */ + if (bfin_read_DMA2_IRQ_STATUS() & 0x08) + goto out; + + /* tx dma is not running */ + bfin_write_DMA2_NEXT_DESC_PTR(&(current_tx_ptr->desc_a)); + /* dma enabled, read from memory, size is 6 */ + bfin_write_DMA2_CONFIG(current_tx_ptr->desc_a.config); + /* Turn on the EMAC tx */ + bfin_write_EMAC_OPMODE(bfin_read_EMAC_OPMODE() | TE); + +out: + adjust_tx_list(); + current_tx_ptr = current_tx_ptr->next; + dev->trans_start = jiffies; + lp->stats.tx_packets++; + lp->stats.tx_bytes += (skb->len); + return 0; +} + +static void bf537mac_rx(struct net_device *dev) +{ + struct sk_buff *skb, *new_skb; + struct bf537mac_local *lp = netdev_priv(dev); + unsigned short len; + + /* allocate a new skb for next time receive */ + skb = current_rx_ptr->skb; + new_skb = dev_alloc_skb(PKT_BUF_SZ + 2); + if (!new_skb) { + printk(KERN_NOTICE DRV_NAME + ": rx: low on mem - packet dropped\n"); + lp->stats.rx_dropped++; + goto out; + } + /* reserve 2 bytes for RXDWA padding */ + skb_reserve(new_skb, 2); + current_rx_ptr->skb = new_skb; + current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2; + + len = (unsigned short)((current_rx_ptr->status.status_word) & RX_FRLEN); + skb_put(skb, len); + blackfin_dcache_invalidate_range((unsigned long)skb->head, + (unsigned long)skb->tail); + + dev->last_rx = jiffies; + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); +#if defined(BFIN_MAC_CSUM_OFFLOAD) + skb->csum = current_rx_ptr->status.ip_payload_csum; + skb->ip_summed = CHECKSUM_PARTIAL; +#endif + + netif_rx(skb); + lp->stats.rx_packets++; + lp->stats.rx_bytes += len; + current_rx_ptr->status.status_word = 0x00000000; + current_rx_ptr = current_rx_ptr->next; + +out: + return; +} + +/* interrupt routine to handle rx and error signal */ +static irqreturn_t bf537mac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + int number = 0; + +get_one_packet: + if (current_rx_ptr->status.status_word == 0) { + /* no more new packet received */ + if (number == 0) { + if (current_rx_ptr->next->status.status_word != 0) { + current_rx_ptr = current_rx_ptr->next; + goto real_rx; + } + } + bfin_write_DMA1_IRQ_STATUS(bfin_read_DMA1_IRQ_STATUS() | + DMA_DONE | DMA_ERR); + return IRQ_HANDLED; + } + +real_rx: + bf537mac_rx(dev); + number++; + goto get_one_packet; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void bf537mac_poll(struct net_device *dev) +{ + disable_irq(IRQ_MAC_RX); + bf537mac_interrupt(IRQ_MAC_RX, dev); + enable_irq(IRQ_MAC_RX); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static void bf537mac_reset(void) +{ + unsigned int opmode; + + opmode = bfin_read_EMAC_OPMODE(); + opmode &= (~RE); + opmode &= (~TE); + /* Turn off the EMAC */ + bfin_write_EMAC_OPMODE(opmode); +} + +/* + * Enable Interrupts, Receive, and Transmit + */ +static int bf537mac_enable(struct net_device *dev) +{ + u32 opmode; + + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + /* Set RX DMA */ + bfin_write_DMA1_NEXT_DESC_PTR(&(rx_list_head->desc_a)); + bfin_write_DMA1_CONFIG(rx_list_head->desc_a.config); + + /* Wait MII done */ + poll_mdc_done(); + + /* We enable only RX here */ + /* ASTP : Enable Automatic Pad Stripping + PR : Promiscuous Mode for test + PSF : Receive frames with total length less than 64 bytes. + FDMODE : Full Duplex Mode + LB : Internal Loopback for test + RE : Receiver Enable */ + opmode = bfin_read_EMAC_OPMODE(); + if (opmode & FDMODE) + opmode |= PSF; + else + opmode |= DRO | DC | PSF; + opmode |= RE; + +#if defined(CONFIG_BFIN_MAC_RMII) + opmode |= RMII; /* For Now only 100MBit are supported */ +#ifdef CONFIG_BF_REV_0_2 + opmode |= TE; +#endif +#endif + /* Turn on the EMAC rx */ + bfin_write_EMAC_OPMODE(opmode); + + return 0; +} + +/* Our watchdog timed out. Called by the networking layer */ +static void bf537mac_timeout(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + bf537mac_reset(); + + /* reset tx queue */ + tx_list_tail = tx_list_head->next; + + bf537mac_enable(dev); + + /* We can accept TX packets again */ + dev->trans_start = jiffies; + netif_wake_queue(dev); +} + +/* + * Get the current statistics. + * This may be called with the card open or closed. + */ +static struct net_device_stats *bf537mac_query_statistics(struct net_device + *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + return &lp->stats; +} + +/* + * This routine will, depending on the values passed to it, + * either make it accept multicast packets, go into + * promiscuous mode (for TCPDUMP and cousins) or accept + * a select set of multicast packets + */ +static void bf537mac_set_multicast_list(struct net_device *dev) +{ + u32 sysctl; + + if (dev->flags & IFF_PROMISC) { + printk(KERN_INFO "%s: set to promisc mode\n", dev->name); + sysctl = bfin_read_EMAC_OPMODE(); + sysctl |= RAF; + bfin_write_EMAC_OPMODE(sysctl); + } else if (dev->flags & IFF_ALLMULTI || dev->mc_count) { + /* accept all multicast */ + sysctl = bfin_read_EMAC_OPMODE(); + sysctl |= PAM; + bfin_write_EMAC_OPMODE(sysctl); + } else { + /* clear promisc or multicast mode */ + sysctl = bfin_read_EMAC_OPMODE(); + sysctl &= ~(RAF | PAM); + bfin_write_EMAC_OPMODE(sysctl); + } +} + +/* + * this puts the device in an inactive state + */ +static void bf537mac_shutdown(struct net_device *dev) +{ + /* Turn off the EMAC */ + bfin_write_EMAC_OPMODE(0x00000000); + /* Turn off the EMAC RX DMA */ + bfin_write_DMA1_CONFIG(0x0000); + bfin_write_DMA2_CONFIG(0x0000); +} + +/* + * Open and Initialize the interface + * + * Set up everything, reset the card, etc.. + */ +static int bf537mac_open(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + /* + * Check that the address is valid. If its not, refuse + * to bring the device up. The user must specify an + * address using ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx + */ + if (!is_valid_ether_addr(dev->dev_addr)) { + printk(KERN_WARNING DRV_NAME ": no valid ethernet hw addr\n"); + return -EINVAL; + } + + /* initial rx and tx list */ + desc_list_init(); + + bf537mac_setphy(dev); + setup_system_regs(dev); + bf537mac_reset(); + bf537mac_enable(dev); + + pr_debug("hardware init finished\n"); + netif_start_queue(dev); + netif_carrier_on(dev); + + return 0; +} + +/* + * + * this makes the board clean up everything that it can + * and not talk to the outside world. Caused by + * an 'ifconfig ethX down' + */ +static int bf537mac_close(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + netif_stop_queue(dev); + netif_carrier_off(dev); + + /* clear everything */ + bf537mac_shutdown(dev); + + /* free the rx/tx buffers */ + desc_list_free(); + + return 0; +} + +static int __init bf537mac_probe(struct net_device *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + int retval; + + /* Grab the MAC address in the MAC */ + *(__le32 *) (&(dev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO()); + *(__le16 *) (&(dev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); + + /* probe mac */ + /*todo: how to proble? which is revision_register */ + bfin_write_EMAC_ADDRLO(0x12345678); + if (bfin_read_EMAC_ADDRLO() != 0x12345678) { + pr_debug("can't detect bf537 mac!\n"); + retval = -ENODEV; + goto err_out; + } + + /* set the GPIO pins to Ethernet mode */ + retval = setup_pin_mux(1); + + if (retval) + return retval; + + /*Is it valid? (Did bootloader initialize it?) */ + if (!is_valid_ether_addr(dev->dev_addr)) { + /* Grab the MAC from the board somehow - this is done in the + arch/blackfin/mach-bf537/boards/eth_mac.c */ + get_bf537_ether_addr(dev->dev_addr); + } + + /* If still not valid, get a random one */ + if (!is_valid_ether_addr(dev->dev_addr)) { + random_ether_addr(dev->dev_addr); + } + + setup_mac_addr(dev->dev_addr); + + /* Fill in the fields of the device structure with ethernet values. */ + ether_setup(dev); + + dev->open = bf537mac_open; + dev->stop = bf537mac_close; + dev->hard_start_xmit = bf537mac_hard_start_xmit; + dev->tx_timeout = bf537mac_timeout; + dev->get_stats = bf537mac_query_statistics; + dev->set_multicast_list = bf537mac_set_multicast_list; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = bf537mac_poll; +#endif + + /* fill in some of the fields */ + lp->version = 1; + lp->PhyAddr = 0x01; + lp->CLKIN = 25; + lp->FullDuplex = 0; + lp->Negotiate = 1; + lp->FlowControl = 0; + spin_lock_init(&lp->lock); + + /* now, enable interrupts */ + /* register irq handler */ + if (request_irq + (IRQ_MAC_RX, bf537mac_interrupt, IRQF_DISABLED | IRQF_SHARED, + "BFIN537_MAC_RX", dev)) { + printk(KERN_WARNING DRV_NAME + ": Unable to attach BlackFin MAC RX interrupt\n"); + return -EBUSY; + } + + /* Enable PHY output early */ + if (!(bfin_read_VR_CTL() & PHYCLKOE)) + bfin_write_VR_CTL(bfin_read_VR_CTL() | PHYCLKOE); + + retval = register_netdev(dev); + if (retval == 0) { + /* now, print out the card info, in a short format.. */ + printk(KERN_INFO "%s: Version %s, %s\n", + DRV_NAME, DRV_VERSION, DRV_DESC); + } + +err_out: + return retval; +} + +static int bfin_mac_probe(struct platform_device *pdev) +{ + struct net_device *ndev; + + ndev = alloc_etherdev(sizeof(struct bf537mac_local)); + if (!ndev) { + printk(KERN_WARNING DRV_NAME ": could not allocate device\n"); + return -ENOMEM; + } + + SET_MODULE_OWNER(ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + platform_set_drvdata(pdev, ndev); + + if (bf537mac_probe(ndev) != 0) { + platform_set_drvdata(pdev, NULL); + free_netdev(ndev); + printk(KERN_WARNING DRV_NAME ": not found\n"); + return -ENODEV; + } + + return 0; +} + +static int bfin_mac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + unregister_netdev(ndev); + + free_irq(IRQ_MAC_RX, ndev); + + free_netdev(ndev); + + setup_pin_mux(0); + + return 0; +} + +static int bfin_mac_suspend(struct platform_device *pdev, pm_message_t state) +{ + return 0; +} + +static int bfin_mac_resume(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver bfin_mac_driver = { + .probe = bfin_mac_probe, + .remove = bfin_mac_remove, + .resume = bfin_mac_resume, + .suspend = bfin_mac_suspend, + .driver = { + .name = DRV_NAME, + }, +}; + +static int __init bfin_mac_init(void) +{ + return platform_driver_register(&bfin_mac_driver); +} + +module_init(bfin_mac_init); + +static void __exit bfin_mac_cleanup(void) +{ + platform_driver_unregister(&bfin_mac_driver); +} + +module_exit(bfin_mac_cleanup); diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h new file mode 100644 index 00000000000..af87189b85f --- /dev/null +++ b/drivers/net/bfin_mac.h @@ -0,0 +1,132 @@ +/* + * File: drivers/net/bfin_mac.c + * Based on: + * Maintainer: + * Bryan Wu <bryan.wu@analog.com> + * + * Original author: + * Luke Yang <luke.yang@analog.com> + * + * Created: + * Description: + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program ; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * PHY REGISTER NAMES + */ +#define PHYREG_MODECTL 0x0000 +#define PHYREG_MODESTAT 0x0001 +#define PHYREG_PHYID1 0x0002 +#define PHYREG_PHYID2 0x0003 +#define PHYREG_ANAR 0x0004 +#define PHYREG_ANLPAR 0x0005 +#define PHYREG_ANER 0x0006 +#define PHYREG_NSR 0x0010 +#define PHYREG_LBREMR 0x0011 +#define PHYREG_REC 0x0012 +#define PHYREG_10CFG 0x0013 +#define PHYREG_PHY1_1 0x0014 +#define PHYREG_PHY1_2 0x0015 +#define PHYREG_PHY2 0x0016 +#define PHYREG_TW_1 0x0017 +#define PHYREG_TW_2 0x0018 +#define PHYREG_TEST 0x0019 + +#define PHY_RESET 0x8000 +#define PHY_ANEG_EN 0x1000 +#define PHY_DUPLEX 0x0100 +#define PHY_SPD_SET 0x2000 + +#define BFIN_MAC_CSUM_OFFLOAD + +struct dma_descriptor { + struct dma_descriptor *next_dma_desc; + unsigned long start_addr; + unsigned short config; + unsigned short x_count; +}; + +struct status_area_rx { +#if defined(BFIN_MAC_CSUM_OFFLOAD) + unsigned short ip_hdr_csum; /* ip header checksum */ + /* ip payload(udp or tcp or others) checksum */ + unsigned short ip_payload_csum; +#endif + unsigned long status_word; /* the frame status word */ +}; + +struct status_area_tx { + unsigned long status_word; /* the frame status word */ +}; + +/* use two descriptors for a packet */ +struct net_dma_desc_rx { + struct net_dma_desc_rx *next; + struct sk_buff *skb; + struct dma_descriptor desc_a; + struct dma_descriptor desc_b; + struct status_area_rx status; +}; + +/* use two descriptors for a packet */ +struct net_dma_desc_tx { + struct net_dma_desc_tx *next; + struct sk_buff *skb; + struct dma_descriptor desc_a; + struct dma_descriptor desc_b; + unsigned char packet[1560]; + struct status_area_tx status; +}; + +struct bf537mac_local { + /* + * these are things that the kernel wants me to keep, so users + * can find out semi-useless statistics of how well the card is + * performing + */ + struct net_device_stats stats; + + int version; + + int FlowEnabled; /* record if data flow is active */ + int EtherIntIVG; /* IVG for the ethernet interrupt */ + int RXIVG; /* IVG for the RX completion */ + int TXIVG; /* IVG for the TX completion */ + int PhyAddr; /* PHY address */ + int OpMode; /* set these bits n the OPMODE regs */ + int Port10; /* set port speed to 10 Mbit/s */ + int GenChksums; /* IP checksums to be calculated */ + int NoRcveLnth; /* dont insert recv length at start of buffer */ + int StripPads; /* remove trailing pad bytes */ + int FullDuplex; /* set full duplex mode */ + int Negotiate; /* enable auto negotiation */ + int Loopback; /* loopback at the PHY */ + int Cache; /* Buffers may be cached */ + int FlowControl; /* flow control active */ + int CLKIN; /* clock in value in MHZ */ + unsigned short IntMask; /* interrupt mask */ + unsigned char Mac[6]; /* MAC address of the board */ + spinlock_t lock; +}; + +extern void get_bf537_ether_addr(char *addr); diff --git a/drivers/net/bsd_comp.c b/drivers/net/bsd_comp.c index 7845eaf6f29..202d4a4ef75 100644 --- a/drivers/net/bsd_comp.c +++ b/drivers/net/bsd_comp.c @@ -395,14 +395,13 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp) * Allocate the main control structure for this instance. */ maxmaxcode = MAXCODE(bits); - db = kmalloc(sizeof (struct bsd_db), + db = kzalloc(sizeof (struct bsd_db), GFP_KERNEL); if (!db) { return NULL; } - memset (db, 0, sizeof(struct bsd_db)); /* * Allocate space for the dictionary. This may be more than one page in * length. diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 6628fa622e2..489c8b260dd 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include <asm/io.h> #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0070" +#define DRV_VERSION "EHEA_0071" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 1d1571cf322..4c70a9301c1 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -466,6 +466,8 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev, cqe->vlan_tag); else netif_receive_skb(skb); + + dev->last_rx = jiffies; } else { pr->p_stats.poll_receive_errors++; port_reset = ehea_treat_poll_error(pr, rq, cqe, @@ -1433,7 +1435,8 @@ static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { - ehea_error("reg_dereg_bcmc failed (tagged)"); + ehea_error("%sregistering bc address failed (tagged)", + hcallid == H_REG_BCMC ? "" : "de"); ret = -EIO; goto out_herr; } @@ -1444,7 +1447,8 @@ static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { - ehea_error("reg_dereg_bcmc failed (vlan)"); + ehea_error("%sregistering bc address failed (vlan)", + hcallid == H_REG_BCMC ? "" : "de"); ret = -EIO; } out_herr: @@ -2170,7 +2174,6 @@ static int ehea_up(struct net_device *dev) { int ret, i; struct ehea_port *port = netdev_priv(dev); - u64 mac_addr = 0; if (port->state == EHEA_PORT_UP) return 0; @@ -2189,18 +2192,10 @@ static int ehea_up(struct net_device *dev) goto out_clean_pr; } - ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); - if (ret) { - ret = -EIO; - ehea_error("out_clean_pr"); - goto out_clean_pr; - } - mac_addr = (*(u64*)dev->dev_addr) >> 16; - ret = ehea_reg_interrupts(dev); if (ret) { - ehea_error("out_dereg_bc"); - goto out_dereg_bc; + ehea_error("reg_interrupts failed. ret:%d", ret); + goto out_clean_pr; } for(i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { @@ -2226,9 +2221,6 @@ static int ehea_up(struct net_device *dev) out_free_irqs: ehea_free_interrupts(dev); -out_dereg_bc: - ehea_broadcast_reg_helper(port, H_DEREG_BCMC); - out_clean_pr: ehea_clean_all_portres(port); out: @@ -2273,7 +2265,6 @@ static int ehea_down(struct net_device *dev) &port->port_res[i].d_netdev->state)) msleep(1); - ehea_broadcast_reg_helper(port, H_DEREG_BCMC); port->state = EHEA_PORT_DOWN; ret = ehea_clean_all_portres(port); @@ -2655,12 +2646,18 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, INIT_WORK(&port->reset_task, ehea_reset_port); + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) { + ret = -EIO; + goto out_unreg_port; + } + ehea_set_ethtool_ops(dev); ret = register_netdev(dev); if (ret) { ehea_error("register_netdev failed. ret=%d", ret); - goto out_unreg_port; + goto out_dereg_bc; } ret = ehea_get_jumboframe_status(port, &jumbo); @@ -2675,6 +2672,9 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, return port; +out_dereg_bc: + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + out_unreg_port: ehea_unregister_port(port); @@ -2694,6 +2694,7 @@ static void ehea_shutdown_single_port(struct ehea_port *port) { unregister_netdev(port->netdev); ehea_unregister_port(port); + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); kfree(port->mc_list); free_netdev(port->netdev); port->adapter->active_ports--; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 136827f8dc2..6d1d50a1978 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5137,12 +5137,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_unmap; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; } - np->rx_skb = kmalloc(sizeof(struct nv_skb_map) * np->rx_ring_size, GFP_KERNEL); - np->tx_skb = kmalloc(sizeof(struct nv_skb_map) * np->tx_ring_size, GFP_KERNEL); + np->rx_skb = kcalloc(np->rx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL); + np->tx_skb = kcalloc(np->tx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL); if (!np->rx_skb || !np->tx_skb) goto out_freering; - memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size); - memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size); dev->open = nv_open; dev->stop = nv_close; diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index d7a1a58de76..f92690555dd 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -420,8 +420,18 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) if (ecntrl & ECNTRL_REDUCED_MODE) { if (ecntrl & ECNTRL_REDUCED_MII_MODE) return PHY_INTERFACE_MODE_RMII; - else + else { + phy_interface_t interface = priv->einfo->interface; + + /* + * This isn't autodetected right now, so it must + * be set by the device tree or platform code. + */ + if (interface == PHY_INTERFACE_MODE_RGMII_ID) + return PHY_INTERFACE_MODE_RGMII_ID; + return PHY_INTERFACE_MODE_RGMII; + } } if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index 3be8c504759..205f0967249 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -453,8 +453,8 @@ static int __init setup_adapter(int card_base, int type, int n) int scc_base = card_base + hw[type].scc_offset; char *chipnames[] = CHIPNAMES; - /* Allocate memory */ - info = kmalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); + /* Initialize what is necessary for write_scc and write_scc_data */ + info = kzalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); if (!info) { printk(KERN_ERR "dmascc: " "could not allocate memory for %s at %#3x\n", @@ -462,8 +462,6 @@ static int __init setup_adapter(int card_base, int type, int n) goto out; } - /* Initialize what is necessary for write_scc and write_scc_data */ - memset(info, 0, sizeof(struct scc_info)); info->dev[0] = alloc_netdev(0, "", dev_setup); if (!info->dev[0]) { diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c index 3078c419cb0..20732458f5a 100644 --- a/drivers/net/irda/irport.c +++ b/drivers/net/irda/irport.c @@ -164,14 +164,13 @@ irport_open(int i, unsigned int iobase, unsigned int irq) /* Allocate memory if needed */ if (self->tx_buff.truesize > 0) { - self->tx_buff.head = kmalloc(self->tx_buff.truesize, + self->tx_buff.head = kzalloc(self->tx_buff.truesize, GFP_KERNEL); if (self->tx_buff.head == NULL) { IRDA_ERROR("%s(), can't allocate memory for " "transmit buffer!\n", __FUNCTION__); goto err_out4; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); } self->tx_buff.data = self->tx_buff.head; diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index ad1857364d5..6f5f697ec9f 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -505,10 +505,9 @@ static int irtty_open(struct tty_struct *tty) } /* allocate private device info block */ - priv = kmalloc(sizeof(*priv), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_put; - memset(priv, 0, sizeof(*priv)); priv->magic = IRTTY_MAGIC; priv->tty = tty; diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 347d50cd77d..0433c41f902 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -822,10 +822,9 @@ static int veth_init_connection(u8 rlp) || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) return 0; - cnx = kmalloc(sizeof(*cnx), GFP_KERNEL); + cnx = kzalloc(sizeof(*cnx), GFP_KERNEL); if (! cnx) return -ENOMEM; - memset(cnx, 0, sizeof(*cnx)); cnx->remote_lp = rlp; spin_lock_init(&cnx->lock); @@ -852,14 +851,13 @@ static int veth_init_connection(u8 rlp) if (rc != 0) return rc; - msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL); + msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL); if (! msgs) { veth_error("Can't allocate buffers for LPAR %d.\n", rlp); return -ENOMEM; } cnx->msgs = msgs; - memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg)); for (i = 0; i < VETH_NUMBUFFERS; i++) { msgs[i].token = i; diff --git a/drivers/net/lance.c b/drivers/net/lance.c index a2f37e52b92..a4e5fab1262 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -533,11 +533,10 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int dev->base_addr = ioaddr; /* Make certain the data structures used by the LANCE are aligned and DMAble. */ - lp = kmalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL); + lp = kzalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL); if(lp==NULL) return -ENODEV; if (lance_debug > 6) printk(" (#0x%05lx)", (unsigned long)lp); - memset(lp, 0, sizeof(*lp)); dev->priv = lp; lp->name = chipname; lp->rx_buffs = (unsigned long)kmalloc(PKT_BUF_SZ*RX_RING_SIZE, diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c new file mode 100644 index 00000000000..112778652f7 --- /dev/null +++ b/drivers/net/lguest_net.c @@ -0,0 +1,354 @@ +/* A simple network driver for lguest. + * + * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +//#define DEBUG +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/mm_types.h> +#include <linux/io.h> +#include <linux/lguest_bus.h> + +#define SHARED_SIZE PAGE_SIZE +#define MAX_LANS 4 +#define NUM_SKBS 8 + +struct lguestnet_info +{ + /* The shared page(s). */ + struct lguest_net *peer; + unsigned long peer_phys; + unsigned long mapsize; + + /* The lguest_device I come from */ + struct lguest_device *lgdev; + + /* My peerid. */ + unsigned int me; + + /* Receive queue. */ + struct sk_buff *skb[NUM_SKBS]; + struct lguest_dma dma[NUM_SKBS]; +}; + +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); +} + +/* Simple convention: offset 4 * peernum. */ +static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum) +{ + return info->peer_phys + 4 * peernum; +} + +static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen, + struct lguest_dma *dma) +{ + unsigned int i, seg; + + for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) { + dma->addr[seg] = virt_to_phys(skb->data + i); + dma->len[seg] = min((unsigned)(headlen - i), + rest_of_page(skb->data + i)); + } + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) { + const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */ + if (seg == LGUEST_MAX_DMA_SECTIONS) { + printk("Woah dude! Megapacket!\n"); + break; + } + dma->addr[seg] = page_to_phys(f->page) + f->page_offset; + dma->len[seg] = f->size; + } + if (seg < LGUEST_MAX_DMA_SECTIONS) + dma->len[seg] = 0; +} + +/* We overload multicast bit to show promiscuous mode. */ +#define PROMISC_BIT 0x01 + +static void lguestnet_set_multicast(struct net_device *dev) +{ + struct lguestnet_info *info = netdev_priv(dev); + + if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count) + info->peer[info->me].mac[0] |= PROMISC_BIT; + else + info->peer[info->me].mac[0] &= ~PROMISC_BIT; +} + +static int promisc(struct lguestnet_info *info, unsigned int peer) +{ + return info->peer[peer].mac[0] & PROMISC_BIT; +} + +static int mac_eq(const unsigned char mac[ETH_ALEN], + struct lguestnet_info *info, unsigned int peer) +{ + /* Ignore multicast bit, which peer turns on to mean promisc. */ + if ((info->peer[peer].mac[0] & (~PROMISC_BIT)) != mac[0]) + return 0; + return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0; +} + +static void transfer_packet(struct net_device *dev, + struct sk_buff *skb, + unsigned int peernum) +{ + struct lguestnet_info *info = netdev_priv(dev); + struct lguest_dma dma; + + skb_to_dma(skb, skb_headlen(skb), &dma); + pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len); + + lguest_send_dma(peer_key(info, peernum), &dma); + if (dma.used_len != skb->len) { + dev->stats.tx_carrier_errors++; + pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n", + peernum, dma.used_len, skb->len, + (void *)dma.addr[0], dma.len[0]); + } else { + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + } +} + +static int unused_peer(const struct lguest_net peer[], unsigned int num) +{ + return peer[num].mac[0] == 0; +} + +static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + int broadcast; + struct lguestnet_info *info = netdev_priv(dev); + const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; + + pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n", + dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]); + + broadcast = is_multicast_ether_addr(dest); + for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) { + if (i == info->me || unused_peer(info->peer, i)) + continue; + + if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i)) + continue; + + pr_debug("lguestnet %s: sending from %i to %i\n", + dev->name, info->me, i); + transfer_packet(dev, skb, i); + } + dev_kfree_skb(skb); + return 0; +} + +/* Find a new skb to put in this slot in shared mem. */ +static int fill_slot(struct net_device *dev, unsigned int slot) +{ + struct lguestnet_info *info = netdev_priv(dev); + /* Try to create and register a new one. */ + info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN); + if (!info->skb[slot]) { + printk("%s: could not fill slot %i\n", dev->name, slot); + return -ENOMEM; + } + + skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]); + wmb(); + /* Now we tell hypervisor it can use the slot. */ + info->dma[slot].used_len = 0; + return 0; +} + +static irqreturn_t lguestnet_rcv(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct lguestnet_info *info = netdev_priv(dev); + unsigned int i, done = 0; + + for (i = 0; i < ARRAY_SIZE(info->dma); i++) { + unsigned int length; + struct sk_buff *skb; + + length = info->dma[i].used_len; + if (length == 0) + continue; + + done++; + skb = info->skb[i]; + fill_slot(dev, i); + + if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) { + pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n", + dev->name, length); + dev_kfree_skb(skb); + continue; + } + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + /* This is a reliable transport. */ + if (dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = CHECKSUM_UNNECESSARY; + pr_debug("Receiving skb proto 0x%04x len %i type %i\n", + ntohs(skb->protocol), skb->len, skb->pkt_type); + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + netif_rx(skb); + } + return done ? IRQ_HANDLED : IRQ_NONE; +} + +static int lguestnet_open(struct net_device *dev) +{ + int i; + struct lguestnet_info *info = netdev_priv(dev); + + /* Set up our MAC address */ + memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN); + + /* Turn on promisc mode if needed */ + lguestnet_set_multicast(dev); + + for (i = 0; i < ARRAY_SIZE(info->dma); i++) { + if (fill_slot(dev, i) != 0) + goto cleanup; + } + if (lguest_bind_dma(peer_key(info,info->me), info->dma, + NUM_SKBS, lgdev_irq(info->lgdev)) != 0) + goto cleanup; + return 0; + +cleanup: + while (--i >= 0) + dev_kfree_skb(info->skb[i]); + return -ENOMEM; +} + +static int lguestnet_close(struct net_device *dev) +{ + unsigned int i; + struct lguestnet_info *info = netdev_priv(dev); + + /* Clear all trace: others might deliver packets, we'll ignore it. */ + memset(&info->peer[info->me], 0, sizeof(info->peer[info->me])); + + /* Deregister sg lists. */ + lguest_unbind_dma(peer_key(info, info->me), info->dma); + for (i = 0; i < ARRAY_SIZE(info->dma); i++) + dev_kfree_skb(info->skb[i]); + return 0; +} + +static int lguestnet_probe(struct lguest_device *lgdev) +{ + int err, irqf = IRQF_SHARED; + struct net_device *dev; + struct lguestnet_info *info; + struct lguest_device_desc *desc = &lguest_devices[lgdev->index]; + + pr_debug("lguest_net: probing for device %i\n", lgdev->index); + + dev = alloc_etherdev(sizeof(struct lguestnet_info)); + if (!dev) + return -ENOMEM; + + SET_MODULE_OWNER(dev); + + /* Ethernet defaults with some changes */ + ether_setup(dev); + dev->set_mac_address = NULL; + + dev->dev_addr[0] = 0x02; /* set local assignment bit (IEEE802) */ + dev->dev_addr[1] = 0x00; + memcpy(&dev->dev_addr[2], &lguest_data.guestid, 2); + dev->dev_addr[4] = 0x00; + dev->dev_addr[5] = 0x00; + + dev->open = lguestnet_open; + dev->stop = lguestnet_close; + dev->hard_start_xmit = lguestnet_start_xmit; + + /* Turning on/off promisc will call dev->set_multicast_list. + * We don't actually support multicast yet */ + dev->set_multicast_list = lguestnet_set_multicast; + SET_NETDEV_DEV(dev, &lgdev->dev); + if (desc->features & LGUEST_NET_F_NOCSUM) + dev->features = NETIF_F_SG|NETIF_F_NO_CSUM; + + info = netdev_priv(dev); + info->mapsize = PAGE_SIZE * desc->num_pages; + info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT); + info->lgdev = lgdev; + info->peer = lguest_map(info->peer_phys, desc->num_pages); + if (!info->peer) { + err = -ENOMEM; + goto free; + } + + /* This stores our peerid (upper bits reserved for future). */ + info->me = (desc->features & (info->mapsize-1)); + + err = register_netdev(dev); + if (err) { + pr_debug("lguestnet: registering device failed\n"); + goto unmap; + } + + if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) + irqf |= IRQF_SAMPLE_RANDOM; + if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet", + dev) != 0) { + pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev)); + goto unregister; + } + + pr_debug("lguestnet: registered device %s\n", dev->name); + lgdev->private = dev; + return 0; + +unregister: + unregister_netdev(dev); +unmap: + lguest_unmap(info->peer); +free: + free_netdev(dev); + return err; +} + +static struct lguest_driver lguestnet_drv = { + .name = "lguestnet", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_NET, + .probe = lguestnet_probe, +}; + +static __init int lguestnet_init(void) +{ + return register_lguest_driver(&lguestnet_drv); +} +module_init(lguestnet_init); + +MODULE_DESCRIPTION("Lguest network driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index 1bb088aeaf7..6b32ec94b3a 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -30,41 +30,133 @@ * SOFTWARE. */ +#include <linux/workqueue.h> + #include "mlx4.h" -void mlx4_handle_catas_err(struct mlx4_dev *dev) +enum { + MLX4_CATAS_POLL_INTERVAL = 5 * HZ, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int internal_err_reset = 1; +module_param(internal_err_reset, int, 0644); +MODULE_PARM_DESC(internal_err_reset, + "Reset device on internal errors if non-zero (default 1)"); + +static void dump_err_buf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; - mlx4_err(dev, "Catastrophic error detected:\n"); + mlx4_err(dev, "Internal error detected:\n"); for (i = 0; i < priv->fw.catas_size; ++i) mlx4_err(dev, " buf[%02x]: %08x\n", i, swab32(readl(priv->catas_err.map + i))); +} - mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); +static void poll_catas(unsigned long dev_ptr) +{ + struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + + mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); + + if (internal_err_reset) { + spin_lock(&catas_lock); + list_add(&priv->catas_err.list, &catas_list); + spin_unlock(&catas_lock); + + queue_work(catas_wq, &catas_work); + } + } else + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); } -void mlx4_map_catas_buf(struct mlx4_dev *dev) +static void catas_reset(struct work_struct *work) +{ + struct mlx4_priv *priv, *tmppriv; + struct mlx4_dev *dev; + + LIST_HEAD(tlist); + int ret; + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { + ret = mlx4_restart_one(priv->dev.pdev); + dev = &priv->dev; + if (ret) + mlx4_err(dev, "Reset failed (%d)\n", ret); + else + mlx4_dbg(dev, "Reset succeeded\n"); + } +} + +void mlx4_start_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); unsigned long addr; + INIT_LIST_HEAD(&priv->catas_err.list); + init_timer(&priv->catas_err.timer); + priv->catas_err.map = NULL; + addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + priv->fw.catas_offset; priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) - mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n", + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n", addr); + return; + } + priv->catas_err.timer.data = (unsigned long) dev; + priv->catas_err.timer.function = poll_catas; + priv->catas_err.timer.expires = + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); + add_timer(&priv->catas_err.timer); } -void mlx4_unmap_catas_buf(struct mlx4_dev *dev) +void mlx4_stop_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + del_timer_sync(&priv->catas_err.timer); + if (priv->catas_err.map) iounmap(priv->catas_err.map); + + spin_lock_irq(&catas_lock); + list_del(&priv->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mlx4_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset); + + catas_wq = create_singlethread_workqueue("mlx4_err"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); } diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 27a82cecd69..2095c843fa1 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -89,14 +89,12 @@ struct mlx4_eq_context { (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ - (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD)) -#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) struct mlx4_eqe { u8 reserved1; @@ -264,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -281,14 +279,6 @@ static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) return IRQ_HANDLED; } -static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr) -{ - mlx4_handle_catas_err(dev_ptr); - - /* MSI-X vectors always belong to us */ - return IRQ_HANDLED; -} - static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, int eq_num) { @@ -490,11 +480,9 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); - if (eq_table->eq[MLX4_EQ_CATAS].have_irq) - free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev); } static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) @@ -598,32 +586,19 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) if (dev->flags & MLX4_FLAG_MSI_X) { static const char *eq_name[] = { [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)", - [MLX4_EQ_CATAS] = DRV_NAME " (catas)" + [MLX4_EQ_ASYNC] = DRV_NAME " (async)" }; - err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS, - &priv->eq_table.eq[MLX4_EQ_CATAS]); - if (err) - goto err_out_async; - - for (i = 0; i < MLX4_EQ_CATAS; ++i) { + for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, mlx4_msi_x_interrupt, 0, eq_name[i], priv->eq_table.eq + i); if (err) - goto err_out_catas; + goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } - err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq, - mlx4_catas_interrupt, 0, - eq_name[MLX4_EQ_CATAS], dev); - if (err) - goto err_out_catas; - - priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1; } else { err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, DRV_NAME, dev); @@ -639,22 +614,11 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); - if (dev->flags & MLX4_FLAG_MSI_X) { - err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - if (err) - mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n", - priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err); - } - return 0; -err_out_catas: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); - err_out_async: mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); @@ -675,19 +639,13 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); mlx4_free_irqs(dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); mlx4_unmap_clr_int(dev); diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c index 9ae951bf6aa..be5d9e90ccf 100644 --- a/drivers/net/mlx4/intf.c +++ b/drivers/net/mlx4/intf.c @@ -142,6 +142,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); + mlx4_start_catas_poll(dev); return 0; } @@ -151,6 +152,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index a4f2e0475a7..4dc9dc19b71 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -78,7 +78,7 @@ static const char mlx4_version[] __devinitdata = static struct mlx4_profile default_profile = { .num_qp = 1 << 16, .num_srq = 1 << 16, - .rdmarc_per_qp = 4, + .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, .num_mcg = 1 << 13, .num_mpt = 1 << 17, @@ -583,13 +583,11 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev) goto err_pd_table_free; } - mlx4_map_catas_buf(dev); - err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table, aborting.\n"); - goto err_catas_buf; + goto err_mr_table_free; } err = mlx4_cmd_use_events(dev); @@ -659,8 +657,7 @@ err_cmd_poll: err_eq_table_free: mlx4_cleanup_eq_table(dev); -err_catas_buf: - mlx4_unmap_catas_buf(dev); +err_mr_table_free: mlx4_cleanup_mr_table(dev); err_pd_table_free: @@ -836,9 +833,6 @@ err_cleanup: mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); @@ -885,9 +879,6 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); @@ -908,6 +899,12 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) } } +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_remove_one(pdev); + return mlx4_init_one(pdev, NULL); +} + static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */ @@ -930,6 +927,10 @@ static int __init mlx4_init(void) { int ret; + ret = mlx4_catas_init(); + if (ret) + return ret; + ret = pci_register_driver(&mlx4_driver); return ret < 0 ? ret : 0; } @@ -937,6 +938,7 @@ static int __init mlx4_init(void) static void __exit mlx4_cleanup(void) { pci_unregister_driver(&mlx4_driver); + mlx4_catas_cleanup(); } module_init(mlx4_init); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index d9c91a71fc8..be304a7c2c9 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -39,6 +39,7 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/timer.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -67,7 +68,6 @@ enum { enum { MLX4_EQ_ASYNC, MLX4_EQ_COMP, - MLX4_EQ_CATAS, MLX4_NUM_EQ }; @@ -248,7 +248,8 @@ struct mlx4_mcg_table { struct mlx4_catas_err { u32 __iomem *map; - int size; + struct timer_list timer; + struct list_head list; }; struct mlx4_priv { @@ -311,9 +312,11 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -void mlx4_map_catas_buf(struct mlx4_dev *dev); -void mlx4_unmap_catas_buf(struct mlx4_dev *dev); - +void mlx4_start_catas_poll(struct mlx4_dev *dev); +void mlx4_stop_catas_poll(struct mlx4_dev *dev); +int mlx4_catas_init(void); +void mlx4_catas_cleanup(void); +int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c index 3d5b4232f65..22a3b3dc7d8 100644 --- a/drivers/net/ni5010.c +++ b/drivers/net/ni5010.c @@ -670,14 +670,10 @@ static void ni5010_set_multicast_list(struct net_device *dev) PRINTK2((KERN_DEBUG "%s: entering set_multicast_list\n", dev->name)); - if (dev->flags&IFF_PROMISC || dev->flags&IFF_ALLMULTI) { + if (dev->flags&IFF_PROMISC || dev->flags&IFF_ALLMULTI || dev->mc_list) { dev->flags |= IFF_PROMISC; outb(RMD_PROMISC, EDLC_RMODE); /* Enable promiscuous mode */ PRINTK((KERN_DEBUG "%s: Entering promiscuous mode\n", dev->name)); - } else if (dev->mc_list) { - /* Sorry, multicast not supported */ - PRINTK((KERN_DEBUG "%s: No multicast, entering broadcast mode\n", dev->name)); - outb(RMD_BROADCAST, EDLC_RMODE); } else { PRINTK((KERN_DEBUG "%s: Entering broadcast mode\n", dev->name)); outb(RMD_BROADCAST, EDLC_RMODE); /* Disable promiscuous mode, use normal mode */ diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 104aab3c957..ea80e6cb3de 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1582,7 +1582,7 @@ static void ns83820_set_multicast(struct net_device *ndev) else and_mask &= ~(RFCR_AAU | RFCR_AAM); - if (ndev->flags & IFF_ALLMULTI) + if (ndev->flags & IFF_ALLMULTI || ndev->mc_count) or_mask |= RFCR_AAM; else and_mask &= ~RFCR_AAM; diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 0d1c7a41c9c..ea9414c4d90 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -147,7 +147,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) DEBUG(0, "com20020_attach()\n"); /* Create new network device */ - info = kmalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); + info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); if (!info) goto fail_alloc_info; @@ -155,7 +155,6 @@ static int com20020_probe(struct pcmcia_device *p_dev) if (!dev) goto fail_alloc_dev; - memset(info, 0, sizeof(struct com20020_dev_t)); lp = dev->priv; lp->timeout = timeout; lp->backplane = backplane; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 4ecb8ca5a99..4eafa4f42cf 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -146,9 +146,8 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) DEBUG(0, "ibmtr_attach()\n"); /* Create new token-ring device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info,0,sizeof(*info)); dev = alloc_trdev(sizeof(struct tok_info)); if (!dev) { kfree(info); diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 596222b260d..6a538564791 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -21,6 +21,10 @@ /* Vitesse Extended Control Register 1 */ #define MII_VSC8244_EXT_CON1 0x17 #define MII_VSC8244_EXTCON1_INIT 0x0000 +#define MII_VSC8244_EXTCON1_TX_SKEW_MASK 0x0c00 +#define MII_VSC8244_EXTCON1_RX_SKEW_MASK 0x0300 +#define MII_VSC8244_EXTCON1_TX_SKEW 0x0800 +#define MII_VSC8244_EXTCON1_RX_SKEW 0x0200 /* Vitesse Interrupt Mask Register */ #define MII_VSC8244_IMASK 0x19 @@ -39,7 +43,7 @@ /* Vitesse Auxiliary Control/Status Register */ #define MII_VSC8244_AUX_CONSTAT 0x1c -#define MII_VSC8244_AUXCONSTAT_INIT 0x0004 +#define MII_VSC8244_AUXCONSTAT_INIT 0x0000 #define MII_VSC8244_AUXCONSTAT_DUPLEX 0x0020 #define MII_VSC8244_AUXCONSTAT_SPEED 0x0018 #define MII_VSC8244_AUXCONSTAT_GBIT 0x0010 @@ -51,6 +55,7 @@ MODULE_LICENSE("GPL"); static int vsc824x_config_init(struct phy_device *phydev) { + int extcon; int err; err = phy_write(phydev, MII_VSC8244_AUX_CONSTAT, @@ -58,14 +63,34 @@ static int vsc824x_config_init(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_VSC8244_EXT_CON1, - MII_VSC8244_EXTCON1_INIT); + extcon = phy_read(phydev, MII_VSC8244_EXT_CON1); + + if (extcon < 0) + return err; + + extcon &= ~(MII_VSC8244_EXTCON1_TX_SKEW_MASK | + MII_VSC8244_EXTCON1_RX_SKEW_MASK); + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + extcon |= (MII_VSC8244_EXTCON1_TX_SKEW | + MII_VSC8244_EXTCON1_RX_SKEW); + + err = phy_write(phydev, MII_VSC8244_EXT_CON1, extcon); + return err; } static int vsc824x_ack_interrupt(struct phy_device *phydev) { - int err = phy_read(phydev, MII_VSC8244_ISTAT); + int err = 0; + + /* + * Don't bother to ACK the interrupts if interrupts + * are disabled. The 824x cannot clear the interrupts + * if they are disabled. + */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_read(phydev, MII_VSC8244_ISTAT); return (err < 0) ? err : 0; } @@ -77,8 +102,19 @@ static int vsc824x_config_intr(struct phy_device *phydev) if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, MII_VSC8244_IMASK, MII_VSC8244_IMASK_MASK); - else + else { + /* + * The Vitesse PHY cannot clear the interrupt + * once it has disabled them, so we clear them first + */ + err = phy_read(phydev, MII_VSC8244_ISTAT); + + if (err) + return err; + err = phy_write(phydev, MII_VSC8244_IMASK, 0); + } + return err; } diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index caabbc408c3..27f5b904f48 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -159,12 +159,11 @@ ppp_asynctty_open(struct tty_struct *tty) int err; err = -ENOMEM; - ap = kmalloc(sizeof(*ap), GFP_KERNEL); + ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (ap == 0) goto out; /* initialize the asyncppp structure */ - memset(ap, 0, sizeof(*ap)); ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c index 72c8d6628f5..eb98b661efb 100644 --- a/drivers/net/ppp_deflate.c +++ b/drivers/net/ppp_deflate.c @@ -121,12 +121,11 @@ static void *z_comp_alloc(unsigned char *options, int opt_len) if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; - state = kmalloc(sizeof(*state), + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; - memset (state, 0, sizeof (struct ppp_deflate_state)); state->strm.next_in = NULL; state->w_size = w_size; state->strm.workspace = vmalloc(zlib_deflate_workspacesize()); @@ -341,11 +340,10 @@ static void *z_decomp_alloc(unsigned char *options, int opt_len) if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; - memset (state, 0, sizeof (struct ppp_deflate_state)); state->w_size = w_size; state->strm.next_out = NULL; state->strm.workspace = kmalloc(zlib_inflate_workspacesize(), diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 3ef0092dc09..ef3325b6923 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2684,8 +2684,7 @@ static void __exit ppp_cleanup(void) if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count)) printk(KERN_ERR "PPP: removing module but units remain!\n"); cardmap_destroy(&all_ppp_units); - if (unregister_chrdev(PPP_MAJOR, "ppp") != 0) - printk(KERN_ERR "PPP: failed to unregister PPP device\n"); + unregister_chrdev(PPP_MAJOR, "ppp"); device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); class_destroy(ppp_class); } diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index d5bdd257465..f79cf87a2bf 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -200,11 +200,10 @@ static void *mppe_alloc(unsigned char *options, int optlen) || options[0] != CI_MPPE || options[1] != CILEN_MPPE) goto out; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) goto out; - memset(state, 0, sizeof(*state)); state->arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(state->arc4)) { diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 5918fab3834..ce64032a465 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -207,13 +207,12 @@ ppp_sync_open(struct tty_struct *tty) struct syncppp *ap; int err; - ap = kmalloc(sizeof(*ap), GFP_KERNEL); + ap = kzalloc(sizeof(*ap), GFP_KERNEL); err = -ENOMEM; if (ap == 0) goto out; /* initialize the syncppp structure */ - memset(ap, 0, sizeof(*ap)); ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c index 451486b32f2..7dae4d40497 100644 --- a/drivers/net/saa9730.c +++ b/drivers/net/saa9730.c @@ -940,15 +940,14 @@ static void lan_saa9730_set_multicast(struct net_device *dev) CAM_CONTROL_GROUP_ACC | CAM_CONTROL_BROAD_ACC, &lp->lan_saa9730_regs->CamCtl); } else { - if (dev->flags & IFF_ALLMULTI) { + if (dev->flags & IFF_ALLMULTI || dev->mc_count) { /* accept all multicast packets */ - writel(CAM_CONTROL_COMP_EN | CAM_CONTROL_GROUP_ACC | - CAM_CONTROL_BROAD_ACC, - &lp->lan_saa9730_regs->CamCtl); - } else { /* * Will handle the multicast stuff later. -carstenl */ + writel(CAM_CONTROL_COMP_EN | CAM_CONTROL_GROUP_ACC | + CAM_CONTROL_BROAD_ACC, + &lp->lan_saa9730_regs->CamCtl); } } diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index e886e8d7cfd..4c3d98ff4cd 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -600,10 +600,9 @@ static int __init shaper_init(void) return -ENODEV; alloc_size = sizeof(*dev) * shapers; - devs = kmalloc(alloc_size, GFP_KERNEL); + devs = kzalloc(alloc_size, GFP_KERNEL); if (!devs) return -ENOMEM; - memset(devs, 0, alloc_size); for (i = 0; i < shapers; i++) { diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index 75655add3f3..7f94ca93098 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -626,7 +626,7 @@ static int __devinit tc35815_read_plat_dev_addr(struct net_device *dev) return -ENODEV; } #else -static int __devinit tc35815_read_plat_dev_addr(struct device *dev) +static int __devinit tc35815_read_plat_dev_addr(struct net_device *dev) { return -ENODEV; } diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c index 6b63b350cd5..8ead774d14c 100644 --- a/drivers/net/wan/c101.c +++ b/drivers/net/wan/c101.c @@ -315,12 +315,11 @@ static int __init c101_run(unsigned long irq, unsigned long winbase) return -ENODEV; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "c101: unable to allocate memory\n"); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); card->dev = alloc_hdlcdev(card); if (!card->dev) { diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 9ef49ce148b..26058b4f8f3 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -572,13 +572,11 @@ static int cosa_probe(int base, int irq, int dma) sprintf(cosa->name, "cosa%d", cosa->num); /* Initialize the per-channel data */ - cosa->chan = kmalloc(sizeof(struct channel_data)*cosa->nchannels, - GFP_KERNEL); + cosa->chan = kcalloc(cosa->nchannels, sizeof(struct channel_data), GFP_KERNEL); if (!cosa->chan) { err = -ENOMEM; goto err_out3; } - memset(cosa->chan, 0, sizeof(struct channel_data)*cosa->nchannels); for (i=0; i<cosa->nchannels; i++) { cosa->chan[i].cosa = cosa; cosa->chan[i].num = i; diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c index 6e5f1c89851..a0e8611ad8e 100644 --- a/drivers/net/wan/cycx_main.c +++ b/drivers/net/wan/cycx_main.c @@ -113,12 +113,10 @@ static int __init cycx_init(void) /* Verify number of cards and allocate adapter data space */ cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kmalloc(sizeof(struct cycx_device) * cycx_ncards, - GFP_KERNEL); + cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); if (!cycx_card_array) goto out; - memset(cycx_card_array, 0, sizeof(struct cycx_device) * cycx_ncards); /* Register adapters with WAN router */ for (cnt = 0; cnt < cycx_ncards; ++cnt) { diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 016b3ff3ea5..a8af28b273d 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -376,11 +376,10 @@ static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, } /* allocate and initialize private data */ - chan = kmalloc(sizeof(struct cycx_x25_channel), GFP_KERNEL); + chan = kzalloc(sizeof(struct cycx_x25_channel), GFP_KERNEL); if (!chan) return -ENOMEM; - memset(chan, 0, sizeof(*chan)); strcpy(chan->name, conf->name); chan->card = card; chan->link = conf->port; diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index dca02447145..50d2f9108dc 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -890,12 +890,11 @@ static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr) struct dscc4_dev_priv *root; int i, ret = -ENOMEM; - root = kmalloc(dev_per_card*sizeof(*root), GFP_KERNEL); + root = kcalloc(dev_per_card, sizeof(*root), GFP_KERNEL); if (!root) { printk(KERN_ERR "%s: can't allocate data\n", DRV_NAME); goto err_out; } - memset(root, 0, dev_per_card*sizeof(*root)); for (i = 0; i < dev_per_card; i++) { root[i].dev = alloc_hdlcdev(root + i); @@ -903,12 +902,11 @@ static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr) goto err_free_dev; } - ppriv = kmalloc(sizeof(*ppriv), GFP_KERNEL); + ppriv = kzalloc(sizeof(*ppriv), GFP_KERNEL); if (!ppriv) { printk(KERN_ERR "%s: can't allocate private data\n", DRV_NAME); goto err_free_dev; } - memset(ppriv, 0, sizeof(struct dscc4_pci_priv)); ppriv->root = root; spin_lock_init(&ppriv->lock); diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 58a53b6d9b4..12dae8e2484 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2476,13 +2476,12 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Allocate driver private data */ - card = kmalloc(sizeof (struct fst_card_info), GFP_KERNEL); + card = kzalloc(sizeof (struct fst_card_info), GFP_KERNEL); if (card == NULL) { printk_err("FarSync card found but insufficient memory for" " driver storage\n"); return -ENOMEM; } - memset(card, 0, sizeof (struct fst_card_info)); /* Try to enable the device */ if ((err = pci_enable_device(pdev)) != 0) { diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 9ba3e4ee6ec..bf5f8d9b5c8 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -231,11 +231,10 @@ static struct sv11_device *sv11_init(int iobase, int irq) return NULL; } - sv = kmalloc(sizeof(struct sv11_device), GFP_KERNEL); + sv = kzalloc(sizeof(struct sv11_device), GFP_KERNEL); if(!sv) goto fail3; - memset(sv, 0, sizeof(*sv)); sv->if_ptr=&sv->netdev; sv->netdev.dev = alloc_netdev(0, "hdlc%d", sv11_setup); diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c index 5c322dfb79f..cbdf0b748bd 100644 --- a/drivers/net/wan/n2.c +++ b/drivers/net/wan/n2.c @@ -351,12 +351,11 @@ static int __init n2_run(unsigned long io, unsigned long irq, return -ENODEV; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "n2: unable to allocate memory\n"); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); card->ports[0].dev = alloc_hdlcdev(&card->ports[0]); card->ports[1].dev = alloc_hdlcdev(&card->ports[1]); diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c index 5d8c78ee2cd..99fee2f1d01 100644 --- a/drivers/net/wan/pc300_drv.c +++ b/drivers/net/wan/pc300_drv.c @@ -3456,7 +3456,7 @@ cpc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if ((err = pci_enable_device(pdev)) < 0) return err; - card = kmalloc(sizeof(pc300_t), GFP_KERNEL); + card = kzalloc(sizeof(pc300_t), GFP_KERNEL); if (card == NULL) { printk("PC300 found at RAM 0x%016llx, " "but could not allocate card structure.\n", @@ -3464,7 +3464,6 @@ cpc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto err_disable_dev; } - memset(card, 0, sizeof(pc300_t)); err = -ENODEV; diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c index dfbd3b00f03..6353cb5c658 100644 --- a/drivers/net/wan/pc300too.c +++ b/drivers/net/wan/pc300too.c @@ -334,14 +334,13 @@ static int __devinit pc300_pci_init_one(struct pci_dev *pdev, return i; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "pc300: unable to allocate memory\n"); pci_release_regions(pdev); pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); pci_set_drvdata(pdev, card); if (pdev->device == PCI_DEVICE_ID_PC300_TE_1 || diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c index 7f720de2e9f..092e51d8903 100644 --- a/drivers/net/wan/pci200syn.c +++ b/drivers/net/wan/pci200syn.c @@ -312,14 +312,13 @@ static int __devinit pci200_pci_init_one(struct pci_dev *pdev, return i; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "pci200syn: unable to allocate memory\n"); pci_release_regions(pdev); pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); pci_set_drvdata(pdev, card); card->ports[0].dev = alloc_hdlcdev(&card->ports[0]); card->ports[1].dev = alloc_hdlcdev(&card->ports[1]); diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 6a485f0556f..792e588d7d6 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -1196,10 +1196,9 @@ static int sdla_xfer(struct net_device *dev, struct sdla_mem __user *info, int r if (read) { - temp = kmalloc(mem.len, GFP_KERNEL); + temp = kzalloc(mem.len, GFP_KERNEL); if (!temp) return(-ENOMEM); - memset(temp, 0, mem.len); sdla_read(dev, mem.addr, temp, mem.len); if(copy_to_user(mem.data, temp, mem.len)) { diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 131358108c5..11276bf3149 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -270,11 +270,10 @@ static __init struct slvl_board *slvl_init(int iobase, int irq, return NULL; } - b = kmalloc(sizeof(struct slvl_board), GFP_KERNEL); + b = kzalloc(sizeof(struct slvl_board), GFP_KERNEL); if(!b) goto fail3; - memset(b, 0, sizeof(*b)); if (!(b->dev[0]= slvl_alloc(iobase, irq))) goto fail2; diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index c7360157433..3c78f985638 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -599,7 +599,7 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev, } alloc_size = sizeof(card_t) + ports * sizeof(port_t); - card = kmalloc(alloc_size, GFP_KERNEL); + card = kzalloc(alloc_size, GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "wanXL %s: unable to allocate memory\n", pci_name(pdev)); @@ -607,7 +607,6 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev, pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, alloc_size); pci_set_drvdata(pdev, card); card->pdev = pdev; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 1c9edd97acc..c48b1cc63fd 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -786,14 +786,12 @@ static int __init init_x25_asy(void) printk(KERN_INFO "X.25 async: version 0.00 ALPHA " "(dynamic channels, max=%d).\n", x25_asy_maxdev ); - x25_asy_devs = kmalloc(sizeof(struct net_device *)*x25_asy_maxdev, - GFP_KERNEL); + x25_asy_devs = kcalloc(x25_asy_maxdev, sizeof(struct net_device*), GFP_KERNEL); if (!x25_asy_devs) { printk(KERN_WARNING "X25 async: Can't allocate x25_asy_ctrls[] " "array! Uaargh! (-> No X.25 available)\n"); return -ENOMEM; } - memset(x25_asy_devs, 0, sizeof(struct net_device *)*x25_asy_maxdev); return tty_register_ldisc(N_X25, &x25_ldisc); } diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 072ede71e57..8990585bd22 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -7868,10 +7868,10 @@ static int ipw2100_wx_set_powermode(struct net_device *dev, goto done; } - if ((mode < 1) || (mode > POWER_MODES)) + if ((mode < 0) || (mode > POWER_MODES)) mode = IPW_POWER_AUTO; - if (priv->power_mode != mode) + if (IPW_POWER_LEVEL(priv->power_mode) != mode) err = ipw2100_set_power_mode(priv, mode); done: mutex_unlock(&priv->action_mutex); @@ -7902,7 +7902,7 @@ static int ipw2100_wx_get_powermode(struct net_device *dev, break; case IPW_POWER_AUTO: snprintf(extra, MAX_POWER_STRING, - "Power save level: %d (Auto)", 0); + "Power save level: %d (Auto)", level); break; default: timeout = timeout_duration[level - 1] / 1000; diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index aa32a97380e..61497c46746 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -70,7 +70,7 @@ #define VQ #endif -#define IPW2200_VERSION "1.2.0" VK VD VM VP VR VQ +#define IPW2200_VERSION "1.2.2" VK VD VM VP VR VQ #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2200/2915 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" #define DRV_VERSION IPW2200_VERSION @@ -2506,7 +2506,7 @@ static int ipw_send_power_mode(struct ipw_priv *priv, u32 mode) break; } - param = cpu_to_le32(mode); + param = cpu_to_le32(param); return ipw_send_cmd_pdu(priv, IPW_CMD_POWER_MODE, sizeof(param), ¶m); } @@ -9568,6 +9568,7 @@ static int ipw_wx_set_power(struct net_device *dev, priv->power_mode = IPW_POWER_ENABLED | IPW_POWER_BATTERY; else priv->power_mode = IPW_POWER_ENABLED | priv->power_mode; + err = ipw_send_power_mode(priv, IPW_POWER_LEVEL(priv->power_mode)); if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); @@ -9604,22 +9605,19 @@ static int ipw_wx_set_powermode(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); int mode = *(int *)extra; int err; + mutex_lock(&priv->mutex); - if ((mode < 1) || (mode > IPW_POWER_LIMIT)) { + if ((mode < 1) || (mode > IPW_POWER_LIMIT)) mode = IPW_POWER_AC; - priv->power_mode = mode; - } else { - priv->power_mode = IPW_POWER_ENABLED | mode; - } - if (priv->power_mode != mode) { + if (IPW_POWER_LEVEL(priv->power_mode) != mode) { err = ipw_send_power_mode(priv, mode); - if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); mutex_unlock(&priv->mutex); return err; } + priv->power_mode = IPW_POWER_ENABLED | mode; } mutex_unlock(&priv->mutex); return 0; @@ -10555,7 +10553,7 @@ static irqreturn_t ipw_isr(int irq, void *data) spin_lock(&priv->irq_lock); if (!(priv->status & STATUS_INT_ENABLED)) { - /* Shared IRQ */ + /* IRQ is disabled */ goto none; } diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 28d41a29d7b..a9c339ef116 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -72,6 +72,8 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0586, 0x3413), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0053, 0x5301), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0411, 0x00da), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x2019, 0x5303), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x129b, 0x1667), .driver_info = DEVICE_ZD1211B }, /* "Driverless" devices that need ejecting */ { USB_DEVICE(0x0ace, 0x2011), .driver_info = DEVICE_INSTALLER }, { USB_DEVICE(0x0ace, 0x20ff), .driver_info = DEVICE_INSTALLER }, diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 3a0a3a73493..e503c9c9803 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -466,9 +466,8 @@ static struct nubus_dev* __init parent->base, dir.base); /* Actually we should probably panic if this fails */ - if ((dev = kmalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) + if ((dev = kzalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) return NULL; - memset(dev, 0, sizeof(*dev)); dev->resid = parent->type; dev->directory = dir.base; dev->board = board; @@ -800,9 +799,8 @@ static struct nubus_board* __init nubus_add_board(int slot, int bytelanes) nubus_rewind(&rp, FORMAT_BLOCK_SIZE, bytelanes); /* Actually we should probably panic if this fails */ - if ((board = kmalloc(sizeof(*board), GFP_ATOMIC)) == NULL) + if ((board = kzalloc(sizeof(*board), GFP_ATOMIC)) == NULL) return NULL; - memset(board, 0, sizeof(*board)); board->fblock = rp; /* Dump the format block for debugging purposes */ diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index 8b7d84eca05..802a81d4736 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -105,9 +105,8 @@ static int parport_probe(struct pcmcia_device *link) DEBUG(0, "parport_attach()\n"); /* Create new parport device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); link->priv = info; info->p_dev = link; diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 90ea3b8b99b..bd6ad8b3816 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -324,10 +324,9 @@ static int __devinit parport_serial_pci_probe (struct pci_dev *dev, struct parport_serial_private *priv; int err; - priv = kmalloc (sizeof *priv, GFP_KERNEL); + priv = kzalloc (sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; - memset(priv, 0, sizeof(struct parport_serial_private)); pci_set_drvdata (dev, priv); err = pci_enable_device (dev); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 6846fb42b39..ad90a01b0df 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -148,11 +148,10 @@ static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev) { struct aer_rpc *rpc; - if (!(rpc = kmalloc(sizeof(struct aer_rpc), + if (!(rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL))) return NULL; - memset(rpc, 0, sizeof(struct aer_rpc)); /* * Initialize Root lock access, e_lock, to Root Error Status Reg, * Root Error ID Reg, and Root error producer/consumer index. diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 143c6efc478..a99607142fc 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1127,6 +1127,34 @@ static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp, #endif +/************************ runtime PM support ***************************/ + +static int pcmcia_dev_suspend(struct device *dev, pm_message_t state); +static int pcmcia_dev_resume(struct device *dev); + +static int runtime_suspend(struct device *dev) +{ + int rc; + + down(&dev->sem); + rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); + up(&dev->sem); + if (!rc) + dev->power.power_state.event = PM_EVENT_SUSPEND; + return rc; +} + +static void runtime_resume(struct device *dev) +{ + int rc; + + down(&dev->sem); + rc = pcmcia_dev_resume(dev); + up(&dev->sem); + if (!rc) + dev->power.power_state.event = PM_EVENT_ON; +} + /************************ per-device sysfs output ***************************/ #define pcmcia_device_attr(field, test, format) \ @@ -1173,9 +1201,9 @@ static ssize_t pcmcia_store_pm_state(struct device *dev, struct device_attribute return -EINVAL; if ((!p_dev->suspended) && !strncmp(buf, "off", 3)) - ret = dpm_runtime_suspend(dev, PMSG_SUSPEND); + ret = runtime_suspend(dev); else if (p_dev->suspended && !strncmp(buf, "on", 2)) - dpm_runtime_resume(dev); + runtime_resume(dev); return ret ? ret : count; } @@ -1312,10 +1340,10 @@ static int pcmcia_bus_suspend_callback(struct device *dev, void * _data) struct pcmcia_socket *skt = _data; struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - if (p_dev->socket != skt) + if (p_dev->socket != skt || p_dev->suspended) return 0; - return dpm_runtime_suspend(dev, PMSG_SUSPEND); + return runtime_suspend(dev); } static int pcmcia_bus_resume_callback(struct device *dev, void * _data) @@ -1323,10 +1351,10 @@ static int pcmcia_bus_resume_callback(struct device *dev, void * _data) struct pcmcia_socket *skt = _data; struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - if (p_dev->socket != skt) + if (p_dev->socket != skt || !p_dev->suspended) return 0; - dpm_runtime_resume(dev); + runtime_resume(dev); return 0; } diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 3e20b1cc777..8e7b2dd3881 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -35,12 +35,11 @@ void *pnp_alloc(long size) { void *result; - result = kmalloc(size, GFP_KERNEL); + result = kzalloc(size, GFP_KERNEL); if (!result){ printk(KERN_ERR "pnp: Out of Memory\n"); return NULL; } - memset(result, 0, size); return result; } diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index f935c1f71a5..44420723a35 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -297,11 +297,10 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, struct rio_switch *rswitch; int result, rdid; - rdev = kmalloc(sizeof(struct rio_dev), GFP_KERNEL); + rdev = kzalloc(sizeof(struct rio_dev), GFP_KERNEL); if (!rdev) goto out; - memset(rdev, 0, sizeof(struct rio_dev)); rdev->net = net; rio_mport_read_config_32(port, destid, hopcount, RIO_DEV_ID_CAR, &result); @@ -801,9 +800,8 @@ static struct rio_net __devinit *rio_alloc_net(struct rio_mport *port) { struct rio_net *net; - net = kmalloc(sizeof(struct rio_net), GFP_KERNEL); + net = kzalloc(sizeof(struct rio_net), GFP_KERNEL); if (net) { - memset(net, 0, sizeof(struct rio_net)); INIT_LIST_HEAD(&net->node); INIT_LIST_HEAD(&net->devices); INIT_LIST_HEAD(&net->mports); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index e24ea82dc35..5d760bb6c2c 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -235,7 +235,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } -static int cmos_set_freq(struct device *dev, int freq) +static int cmos_irq_set_freq(struct device *dev, int freq) { struct cmos_rtc *cmos = dev_get_drvdata(dev); int f; @@ -259,6 +259,34 @@ static int cmos_set_freq(struct device *dev, int freq) return 0; } +static int cmos_irq_set_state(struct device *dev, int enabled) +{ + struct cmos_rtc *cmos = dev_get_drvdata(dev); + unsigned char rtc_control, rtc_intr; + unsigned long flags; + + if (!is_valid_irq(cmos->irq)) + return -ENXIO; + + spin_lock_irqsave(&rtc_lock, flags); + rtc_control = CMOS_READ(RTC_CONTROL); + + if (enabled) + rtc_control |= RTC_PIE; + else + rtc_control &= ~RTC_PIE; + + CMOS_WRITE(rtc_control, RTC_CONTROL); + + rtc_intr = CMOS_READ(RTC_INTR_FLAGS); + rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; + if (is_intr(rtc_intr)) + rtc_update_irq(cmos->rtc, 1, rtc_intr); + + spin_unlock_irqrestore(&rtc_lock, flags); + return 0; +} + #if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE) static int @@ -360,7 +388,8 @@ static const struct rtc_class_ops cmos_rtc_ops = { .read_alarm = cmos_read_alarm, .set_alarm = cmos_set_alarm, .proc = cmos_procfs, - .irq_set_freq = cmos_set_freq, + .irq_set_freq = cmos_irq_set_freq, + .irq_set_state = cmos_irq_set_state, }; /*----------------------------------------------------------------*/ diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index e765875e8db..80e7a537e7d 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -131,10 +131,9 @@ tape_34xx_schedule_work(struct tape_device *device, enum tape_op op) { struct tape_34xx_work *p; - if ((p = kmalloc(sizeof(*p), GFP_ATOMIC)) == NULL) + if ((p = kzalloc(sizeof(*p), GFP_ATOMIC)) == NULL) return -ENOMEM; - memset(p, 0, sizeof(*p)); INIT_WORK(&p->work, tape_34xx_work_handler); p->device = tape_get_device_reference(device); diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index 348bb7b8277..023455a0b34 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -317,8 +317,8 @@ claw_probe(struct ccwgroup_device *cgdev) CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM); return -ENOMEM; } - privptr->p_mtc_envelope= kmalloc( MAX_ENVELOPE_SIZE, GFP_KERNEL); - privptr->p_env = kmalloc(sizeof(struct claw_env), GFP_KERNEL); + privptr->p_mtc_envelope= kzalloc( MAX_ENVELOPE_SIZE, GFP_KERNEL); + privptr->p_env = kzalloc(sizeof(struct claw_env), GFP_KERNEL); if ((privptr->p_mtc_envelope==NULL) || (privptr->p_env==NULL)) { probe_error(cgdev); put_device(&cgdev->dev); @@ -327,8 +327,6 @@ claw_probe(struct ccwgroup_device *cgdev) CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM); return -ENOMEM; } - memset(privptr->p_mtc_envelope, 0x00, MAX_ENVELOPE_SIZE); - memset(privptr->p_env, 0x00, sizeof(struct claw_env)); memcpy(privptr->p_env->adapter_name,WS_NAME_NOT_DEF,8); memcpy(privptr->p_env->host_name,WS_NAME_NOT_DEF,8); memcpy(privptr->p_env->api_type,WS_NAME_NOT_DEF,8); @@ -3924,7 +3922,7 @@ add_channel(struct ccw_device *cdev,int i,struct claw_privbk *privptr) snprintf(p_ch->id, CLAW_ID_SIZE, "cl-%s", cdev->dev.bus_id); ccw_device_get_id(cdev, &dev_id); p_ch->devno = dev_id.devno; - if ((p_ch->irb = kmalloc(sizeof (struct irb),GFP_KERNEL)) == NULL) { + if ((p_ch->irb = kzalloc(sizeof (struct irb),GFP_KERNEL)) == NULL) { printk(KERN_WARNING "%s Out of memory in %s for irb\n", p_ch->id,__FUNCTION__); #ifdef FUNCTRACE @@ -3933,7 +3931,6 @@ add_channel(struct ccw_device *cdev,int i,struct claw_privbk *privptr) #endif return -ENOMEM; } - memset(p_ch->irb, 0, sizeof (struct irb)); #ifdef FUNCTRACE printk(KERN_INFO "%s:%s Exit on line %d\n", cdev->dev.bus_id,__FUNCTION__,__LINE__); diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 178155bf9db..fbadd4d761f 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -156,10 +156,9 @@ struct bbc_i2c_client *bbc_i2c_attach(struct linux_ebus_child *echild) if (!bp) return NULL; - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return NULL; - memset(client, 0, sizeof(*client)); client->bp = bp; client->echild = echild; client->bus = echild->resource[0].start; diff --git a/drivers/sbus/char/vfc_dev.c b/drivers/sbus/char/vfc_dev.c index 6afc7e5df0d..26b1d2a17ed 100644 --- a/drivers/sbus/char/vfc_dev.c +++ b/drivers/sbus/char/vfc_dev.c @@ -656,12 +656,9 @@ static int vfc_probe(void) if (!cards) return -ENODEV; - vfc_dev_lst = kmalloc(sizeof(struct vfc_dev *) * - (cards+1), - GFP_KERNEL); + vfc_dev_lst = kcalloc(cards + 1, sizeof(struct vfc_dev*), GFP_KERNEL); if (vfc_dev_lst == NULL) return -ENOMEM; - memset(vfc_dev_lst, 0, sizeof(struct vfc_dev *) * (cards + 1)); vfc_dev_lst[cards] = NULL; ret = register_chrdev(VFC_MAJOR, vfcstr, &vfc_fops); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 76c09097175..6b49f6a2524 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1160,13 +1160,12 @@ static int twa_initialize_device_extension(TW_Device_Extension *tw_dev) } /* Allocate event info space */ - tw_dev->event_queue[0] = kmalloc(sizeof(TW_Event) * TW_Q_LENGTH, GFP_KERNEL); + tw_dev->event_queue[0] = kcalloc(TW_Q_LENGTH, sizeof(TW_Event), GFP_KERNEL); if (!tw_dev->event_queue[0]) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x18, "Event info memory allocation failed"); goto out; } - memset(tw_dev->event_queue[0], 0, sizeof(TW_Event) * TW_Q_LENGTH); for (i = 0; i < TW_Q_LENGTH; i++) { tw_dev->event_queue[i] = (TW_Event *)((unsigned char *)tw_dev->event_queue[0] + (i * sizeof(TW_Event))); diff --git a/drivers/scsi/NCR53C9x.c b/drivers/scsi/NCR53C9x.c index 8b5334c56f0..773d11dd995 100644 --- a/drivers/scsi/NCR53C9x.c +++ b/drivers/scsi/NCR53C9x.c @@ -3606,11 +3606,10 @@ out: int esp_slave_alloc(struct scsi_device *SDptr) { struct esp_device *esp_dev = - kmalloc(sizeof(struct esp_device), GFP_ATOMIC); + kzalloc(sizeof(struct esp_device), GFP_ATOMIC); if (!esp_dev) return -ENOMEM; - memset(esp_dev, 0, sizeof(struct esp_device)); SDptr->hostdata = esp_dev; return 0; } diff --git a/drivers/scsi/NCR_D700.c b/drivers/scsi/NCR_D700.c index f12864abed2..3a8089705fe 100644 --- a/drivers/scsi/NCR_D700.c +++ b/drivers/scsi/NCR_D700.c @@ -181,13 +181,12 @@ NCR_D700_probe_one(struct NCR_D700_private *p, int siop, int irq, struct Scsi_Host *host; int ret; - hostdata = kmalloc(sizeof(*hostdata), GFP_KERNEL); + hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); if (!hostdata) { printk(KERN_ERR "NCR D700: SIOP%d: Failed to allocate host" "data, detatching\n", siop); return -ENOMEM; } - memset(hostdata, 0, sizeof(*hostdata)); if (!request_region(region, 64, "NCR_D700")) { printk(KERN_ERR "NCR D700: Failed to reserve IO region 0x%x\n", diff --git a/drivers/scsi/NCR_Q720.c b/drivers/scsi/NCR_Q720.c index 778844c3544..a8bbdc2273b 100644 --- a/drivers/scsi/NCR_Q720.c +++ b/drivers/scsi/NCR_Q720.c @@ -148,11 +148,10 @@ NCR_Q720_probe(struct device *dev) __u32 base_addr, mem_size; void __iomem *mem_base; - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - memset(p, 0, sizeof(*p)); pos2 = mca_device_read_pos(mca_dev, 2); /* enable device */ pos2 |= NCR_Q720_POS2_BOARD_ENABLE | NCR_Q720_POS2_INTERRUPT_ENABLE; diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 0464c182c57..005d2b05f32 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -1159,11 +1159,10 @@ static int __imm_attach(struct parport *pb) init_waitqueue_head(&waiting); - dev = kmalloc(sizeof(imm_struct), GFP_KERNEL); + dev = kzalloc(sizeof(imm_struct), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(imm_struct)); dev->base = -1; dev->mode = IMM_AUTODETECT; diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 9f8ed6b8157..492a51bd6aa 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -7068,14 +7068,13 @@ ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr) subdevice_id = pci_dev->subsystem_device; /* found a controller */ - ha = kmalloc(sizeof (ips_ha_t), GFP_KERNEL); + ha = kzalloc(sizeof (ips_ha_t), GFP_KERNEL); if (ha == NULL) { IPS_PRINTK(KERN_WARNING, pci_dev, "Unable to allocate temporary ha struct\n"); return -1; } - memset(ha, 0, sizeof (ips_ha_t)); ips_sh[index] = NULL; ips_ha[index] = ha; diff --git a/drivers/scsi/lasi700.c b/drivers/scsi/lasi700.c index 5c32a69e41b..3126824da36 100644 --- a/drivers/scsi/lasi700.c +++ b/drivers/scsi/lasi700.c @@ -101,13 +101,12 @@ lasi700_probe(struct parisc_device *dev) struct NCR_700_Host_Parameters *hostdata; struct Scsi_Host *host; - hostdata = kmalloc(sizeof(*hostdata), GFP_KERNEL); + hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); if (!hostdata) { printk(KERN_ERR "%s: Failed to allocate host data\n", dev->dev.bus_id); return -ENOMEM; } - memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_32BIT_MASK); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f81f85ee190..07bd0dcdf0d 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1830,7 +1830,7 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) /* Initialize and populate the iocb list per host. */ INIT_LIST_HEAD(&phba->lpfc_iocb_list); for (i = 0; i < LPFC_IOCB_LIST_CNT; i++) { - iocbq_entry = kmalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL); + iocbq_entry = kzalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL); if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", @@ -1839,7 +1839,6 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_free_iocbq; } - memset(iocbq_entry, 0, sizeof(struct lpfc_iocbq)); iotag = lpfc_sli_next_iotag(phba, iocbq_entry); if (iotag == 0) { kfree (iocbq_entry); diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index c46685a03a9..c6a53dccc16 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -454,7 +454,7 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); // Allocate the per driver initialization structure - adapter = kmalloc(sizeof(adapter_t), GFP_KERNEL); + adapter = kzalloc(sizeof(adapter_t), GFP_KERNEL); if (adapter == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -462,7 +462,6 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto out_probe_one; } - memset(adapter, 0, sizeof(adapter_t)); // set up PCI related soft state and other pre-known parameters @@ -746,10 +745,9 @@ megaraid_init_mbox(adapter_t *adapter) * Allocate and initialize the init data structure for mailbox * controllers */ - raid_dev = kmalloc(sizeof(mraid_device_t), GFP_KERNEL); + raid_dev = kzalloc(sizeof(mraid_device_t), GFP_KERNEL); if (raid_dev == NULL) return -1; - memset(raid_dev, 0, sizeof(mraid_device_t)); /* * Attach the adapter soft state to raid device soft state @@ -1050,8 +1048,7 @@ megaraid_alloc_cmd_packets(adapter_t *adapter) * since the calling routine does not yet know the number of available * commands. */ - adapter->kscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_SCSI_CMDS, - GFP_KERNEL); + adapter->kscb_list = kcalloc(MBOX_MAX_SCSI_CMDS, sizeof(scb_t), GFP_KERNEL); if (adapter->kscb_list == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -1059,7 +1056,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter) __LINE__)); goto out_free_ibuf; } - memset(adapter->kscb_list, 0, sizeof(scb_t) * MBOX_MAX_SCSI_CMDS); // memory allocation for our command packets if (megaraid_mbox_setup_dma_pools(adapter) != 0) { @@ -3495,8 +3491,7 @@ megaraid_cmm_register(adapter_t *adapter) int i; // Allocate memory for the base list of scb for management module. - adapter->uscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_USER_CMDS, - GFP_KERNEL); + adapter->uscb_list = kcalloc(MBOX_MAX_USER_CMDS, sizeof(scb_t), GFP_KERNEL); if (adapter->uscb_list == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -3504,7 +3499,6 @@ megaraid_cmm_register(adapter_t *adapter) __LINE__)); return -1; } - memset(adapter->uscb_list, 0, sizeof(scb_t) * MBOX_MAX_USER_CMDS); // Initialize the synchronization parameters for resources for diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index 84d9c27133d..b6587a6d848 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -890,12 +890,11 @@ mraid_mm_register_adp(mraid_mmadp_t *lld_adp) if (lld_adp->drvr_type != DRVRTYPE_MBOX) return (-EINVAL); - adapter = kmalloc(sizeof(mraid_mmadp_t), GFP_KERNEL); + adapter = kzalloc(sizeof(mraid_mmadp_t), GFP_KERNEL); if (!adapter) return -ENOMEM; - memset(adapter, 0, sizeof(mraid_mmadp_t)); adapter->unique_id = lld_adp->unique_id; adapter->drvr_type = lld_adp->drvr_type; diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index b7f2e613c90..ebb948c016b 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1636,15 +1636,13 @@ static int megasas_alloc_cmds(struct megasas_instance *instance) * Allocate the dynamic array first and then allocate individual * commands. */ - instance->cmd_list = kmalloc(sizeof(struct megasas_cmd *) * max_cmd, - GFP_KERNEL); + instance->cmd_list = kcalloc(max_cmd, sizeof(struct megasas_cmd*), GFP_KERNEL); if (!instance->cmd_list) { printk(KERN_DEBUG "megasas: out of memory\n"); return -ENOMEM; } - memset(instance->cmd_list, 0, sizeof(struct megasas_cmd *) * max_cmd); for (i = 0; i < max_cmd; i++) { instance->cmd_list[i] = kmalloc(sizeof(struct megasas_cmd), diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 370802d24ac..2dd0dc9a9ae 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -106,9 +106,8 @@ static int aha152x_probe(struct pcmcia_device *link) DEBUG(0, "aha152x_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index c6f8c6e65e0..445cfbbca9b 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1602,9 +1602,8 @@ static int nsp_cs_probe(struct pcmcia_device *link) nsp_dbg(NSP_DEBUG_INIT, "in"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) { return -ENOMEM; } - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; data->ScsiInfo = info; diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 697cfb76c3a..67c5a58d17d 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -162,10 +162,9 @@ static int qlogic_probe(struct pcmcia_device *link) DEBUG(0, "qlogic_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; link->io.NumPorts1 = 16; diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 2695b7187b2..961839ecfe8 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -875,10 +875,9 @@ SYM53C500_probe(struct pcmcia_device *link) DEBUG(0, "SYM53C500_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; link->io.NumPorts1 = 16; diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 2f1fa1eb7e9..67b6d76a6c8 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -1014,10 +1014,9 @@ static int __ppa_attach(struct parport *pb) int modes, ppb, ppb_hi; int err = -ENOMEM; - dev = kmalloc(sizeof(ppa_struct), GFP_KERNEL); + dev = kzalloc(sizeof(ppa_struct), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(ppa_struct)); dev->base = -1; dev->mode = PPA_AUTODETECT; dev->recon_tmo = PPA_RECON_TMO; diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c index 018c65f73ac..710f19de3d4 100644 --- a/drivers/scsi/sim710.c +++ b/drivers/scsi/sim710.c @@ -100,7 +100,7 @@ sim710_probe_common(struct device *dev, unsigned long base_addr, { struct Scsi_Host * host = NULL; struct NCR_700_Host_Parameters *hostdata = - kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + kzalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); printk(KERN_NOTICE "sim710: %s\n", dev->bus_id); printk(KERN_NOTICE "sim710: irq = %d, clock = %d, base = 0x%lx, scsi_id = %d\n", @@ -110,7 +110,6 @@ sim710_probe_common(struct device *dev, unsigned long base_addr, printk(KERN_ERR "sim710: Failed to allocate host data\n"); goto out; } - memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); if(request_region(base_addr, 64, "sim710") == NULL) { printk(KERN_ERR "sim710: Failed to reserve IO region 0x%lx\n", diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c index 14cba1ca38b..5db1520f8ba 100644 --- a/drivers/scsi/tmscsim.c +++ b/drivers/scsi/tmscsim.c @@ -2082,10 +2082,9 @@ static int dc390_slave_alloc(struct scsi_device *scsi_device) uint id = scsi_device->id; uint lun = scsi_device->lun; - pDCB = kmalloc(sizeof(struct dc390_dcb), GFP_KERNEL); + pDCB = kzalloc(sizeof(struct dc390_dcb), GFP_KERNEL); if (!pDCB) return -ENOMEM; - memset(pDCB, 0, sizeof(struct dc390_dcb)); if (!pACB->DCBCnt++) { pACB->pLinkDCB = pDCB; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 954073c6ce3..72229df9dc1 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -716,7 +716,7 @@ static int pl011_probe(struct amba_device *dev, void *id) goto out; } - uap = kmalloc(sizeof(struct uart_amba_port), GFP_KERNEL); + uap = kzalloc(sizeof(struct uart_amba_port), GFP_KERNEL); if (uap == NULL) { ret = -ENOMEM; goto out; @@ -728,7 +728,6 @@ static int pl011_probe(struct amba_device *dev, void *id) goto free; } - memset(uap, 0, sizeof(struct uart_amba_port)); uap->clk = clk_get(&dev->dev, "UARTCLK"); if (IS_ERR(uap->clk)) { ret = PTR_ERR(uap->clk); diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c index 94b22903119..7d873b3b051 100644 --- a/drivers/sh/superhyway/superhyway.c +++ b/drivers/sh/superhyway/superhyway.c @@ -56,11 +56,10 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *sdev, struct superhyway_device *dev = sdev; if (!dev) { - dev = kmalloc(sizeof(struct superhyway_device), GFP_KERNEL); + dev = kzalloc(sizeof(struct superhyway_device), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(struct superhyway_device)); } dev->bus = bus; diff --git a/drivers/sn/ioc3.c b/drivers/sn/ioc3.c index 2dd6eed50aa..29fcd6d0301 100644 --- a/drivers/sn/ioc3.c +++ b/drivers/sn/ioc3.c @@ -629,7 +629,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) #endif /* Set up per-IOC3 data */ - idd = kmalloc(sizeof(struct ioc3_driver_data), GFP_KERNEL); + idd = kzalloc(sizeof(struct ioc3_driver_data), GFP_KERNEL); if (!idd) { printk(KERN_WARNING "%s: Failed to allocate IOC3 data for pci_dev %s.\n", @@ -637,7 +637,6 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) ret = -ENODEV; goto out_idd; } - memset(idd, 0, sizeof(struct ioc3_driver_data)); spin_lock_init(&idd->ir_lock); spin_lock_init(&idd->gpio_lock); idd->pdev = pdev; diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c index 3e658dc7c2d..ff9a29b7633 100644 --- a/drivers/telephony/ixj_pcmcia.c +++ b/drivers/telephony/ixj_pcmcia.c @@ -45,11 +45,10 @@ static int ixj_probe(struct pcmcia_device *p_dev) p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; p_dev->io.IOAddrLines = 3; p_dev->conf.IntType = INT_MEMORY_AND_IO; - p_dev->priv = kmalloc(sizeof(struct ixj_info_t), GFP_KERNEL); + p_dev->priv = kzalloc(sizeof(struct ixj_info_t), GFP_KERNEL); if (!p_dev->priv) { return -ENOMEM; } - memset(p_dev->priv, 0, sizeof(struct ixj_info_t)); return ixj_config(p_dev); } diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig new file mode 100644 index 00000000000..b778ed71f63 --- /dev/null +++ b/drivers/uio/Kconfig @@ -0,0 +1,29 @@ +menu "Userspace I/O" + depends on !S390 + +config UIO + tristate "Userspace I/O drivers" + default n + help + Enable this to allow the userspace driver core code to be + built. This code allows userspace programs easy access to + kernel interrupts and memory locations, allowing some drivers + to be written in userspace. Note that a small kernel driver + is also required for interrupt handling to work properly. + + If you don't know what to do here, say N. + +config UIO_CIF + tristate "generic Hilscher CIF Card driver" + depends on UIO && PCI + default n + help + Driver for Hilscher CIF DeviceNet and Profibus cards. This + driver requires a userspace component that handles all of the + heavy lifting and can be found at: + http://www.osadl.org/projects/downloads/UIO/user/cif-* + + To compile this driver as a module, choose M here: the module + will be called uio_cif. + +endmenu diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile new file mode 100644 index 00000000000..7fecfb459da --- /dev/null +++ b/drivers/uio/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_UIO) += uio.o +obj-$(CONFIG_UIO_CIF) += uio_cif.o diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c new file mode 100644 index 00000000000..865f32b63b5 --- /dev/null +++ b/drivers/uio/uio.c @@ -0,0 +1,701 @@ +/* + * drivers/uio/uio.c + * + * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de> + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de> + * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com> + * + * Userspace IO + * + * Base Functions + * + * Licensed under the GPLv2 only. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/idr.h> +#include <linux/string.h> +#include <linux/kobject.h> +#include <linux/uio_driver.h> + +#define UIO_MAX_DEVICES 255 + +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + int vma_count; + struct uio_info *info; + struct kset map_attr_kset; +}; + +static int uio_major; +static DEFINE_IDR(uio_idr); +static struct file_operations uio_fops; + +/* UIO class infrastructure */ +static struct uio_class { + struct kref kref; + struct class *class; +} *uio_class; + +/* + * attributes + */ + +static struct attribute attr_addr = { + .name = "addr", + .mode = S_IRUGO, +}; + +static struct attribute attr_size = { + .name = "size", + .mode = S_IRUGO, +}; + +static struct attribute* map_attrs[] = { + &attr_addr, &attr_size, NULL +}; + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct uio_mem *mem = container_of(kobj, struct uio_mem, kobj); + + if (strncmp(attr->name,"addr",4) == 0) + return sprintf(buf, "0x%lx\n", mem->addr); + + if (strncmp(attr->name,"size",4) == 0) + return sprintf(buf, "0x%lx\n", mem->size); + + return -ENODEV; +} + +static void map_attr_release(struct kobject *kobj) +{ + /* TODO ??? */ +} + +static struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static struct kobj_type map_attr_type = { + .release = map_attr_release, + .sysfs_ops = &map_attr_ops, + .default_attrs = map_attrs, +}; + +static ssize_t show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->name); + else + return -ENODEV; +} +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); + +static ssize_t show_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->version); + else + return -ENODEV; +} +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); + +static ssize_t show_event(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%u\n", + (unsigned int)atomic_read(&idev->event)); + else + return -ENODEV; +} +static DEVICE_ATTR(event, S_IRUGO, show_event, NULL); + +static struct attribute *uio_attrs[] = { + &dev_attr_name.attr, + &dev_attr_version.attr, + &dev_attr_event.attr, + NULL, +}; + +static struct attribute_group uio_attr_grp = { + .attrs = uio_attrs, +}; + +/* + * device functions + */ +static int uio_dev_add_attributes(struct uio_device *idev) +{ + int ret; + int mi; + int map_found = 0; + struct uio_mem *mem; + + ret = sysfs_create_group(&idev->dev->kobj, &uio_attr_grp); + if (ret) + goto err_group; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + if (!map_found) { + map_found = 1; + kobject_set_name(&idev->map_attr_kset.kobj,"maps"); + idev->map_attr_kset.ktype = &map_attr_type; + idev->map_attr_kset.kobj.parent = &idev->dev->kobj; + ret = kset_register(&idev->map_attr_kset); + if (ret) + goto err_remove_group; + } + kobject_init(&mem->kobj); + kobject_set_name(&mem->kobj,"map%d",mi); + mem->kobj.parent = &idev->map_attr_kset.kobj; + mem->kobj.kset = &idev->map_attr_kset; + ret = kobject_add(&mem->kobj); + if (ret) + goto err_remove_maps; + } + + return 0; + +err_remove_maps: + for (mi--; mi>=0; mi--) { + mem = &idev->info->mem[mi]; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); /* Needed ? */ +err_remove_group: + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +err_group: + dev_err(idev->dev, "error creating sysfs files (%d)\n", ret); + return ret; +} + +static void uio_dev_del_attributes(struct uio_device *idev) +{ + int mi; + struct uio_mem *mem; + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +} + +static int uio_get_minor(struct uio_device *idev) +{ + static DEFINE_MUTEX(minor_lock); + int retval = -ENOMEM; + int id; + + mutex_lock(&minor_lock); + if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0) + goto exit; + + retval = idr_get_new(&uio_idr, idev, &id); + if (retval < 0) { + if (retval == -EAGAIN) + retval = -ENOMEM; + goto exit; + } + idev->minor = id & MAX_ID_MASK; +exit: + mutex_unlock(&minor_lock); + return retval; +} + +static void uio_free_minor(struct uio_device *idev) +{ + idr_remove(&uio_idr, idev->minor); +} + +/** + * uio_event_notify - trigger an interrupt event + * @info: UIO device capabilities + */ +void uio_event_notify(struct uio_info *info) +{ + struct uio_device *idev = info->uio_dev; + + atomic_inc(&idev->event); + wake_up_interruptible(&idev->wait); + kill_fasync(&idev->async_queue, SIGIO, POLL_IN); +} +EXPORT_SYMBOL_GPL(uio_event_notify); + +/** + * uio_interrupt - hardware interrupt handler + * @irq: IRQ number, can be UIO_IRQ_CYCLIC for cyclic timer + * @dev_id: Pointer to the devices uio_device structure + */ +static irqreturn_t uio_interrupt(int irq, void *dev_id) +{ + struct uio_device *idev = (struct uio_device *)dev_id; + irqreturn_t ret = idev->info->handler(irq, idev->info); + + if (ret == IRQ_HANDLED) + uio_event_notify(idev->info); + + return ret; +} + +struct uio_listener { + struct uio_device *dev; + s32 event_count; +}; + +static int uio_open(struct inode *inode, struct file *filep) +{ + struct uio_device *idev; + struct uio_listener *listener; + int ret = 0; + + idev = idr_find(&uio_idr, iminor(inode)); + if (!idev) + return -ENODEV; + + listener = kmalloc(sizeof(*listener), GFP_KERNEL); + if (!listener) + return -ENOMEM; + + listener->dev = idev; + listener->event_count = atomic_read(&idev->event); + filep->private_data = listener; + + if (idev->info->open) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->open(idev->info, inode); + module_put(idev->owner); + } + + if (ret) + kfree(listener); + + return ret; +} + +static int uio_fasync(int fd, struct file *filep, int on) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + return fasync_helper(fd, filep, on, &idev->async_queue); +} + +static int uio_release(struct inode *inode, struct file *filep) +{ + int ret = 0; + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->release) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->release(idev->info, inode); + module_put(idev->owner); + } + if (filep->f_flags & FASYNC) + ret = uio_fasync(-1, filep, 0); + kfree(listener); + return ret; +} + +static unsigned int uio_poll(struct file *filep, poll_table *wait) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + poll_wait(filep, &idev->wait, wait); + if (listener->event_count != atomic_read(&idev->event)) + return POLLIN | POLLRDNORM; + return 0; +} + +static ssize_t uio_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + DECLARE_WAITQUEUE(wait, current); + ssize_t retval; + s32 event_count; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + if (count != sizeof(s32)) + return -EINVAL; + + add_wait_queue(&idev->wait, &wait); + + do { + set_current_state(TASK_INTERRUPTIBLE); + + event_count = atomic_read(&idev->event); + if (event_count != listener->event_count) { + if (copy_to_user(buf, &event_count, count)) + retval = -EFAULT; + else { + listener->event_count = event_count; + retval = count; + } + break; + } + + if (filep->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + break; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } while (1); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idev->wait, &wait); + + return retval; +} + +static int uio_find_mem_index(struct vm_area_struct *vma) +{ + int mi; + struct uio_device *idev = vma->vm_private_data; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + if (idev->info->mem[mi].size == 0) + return -1; + if (vma->vm_pgoff == mi) + return mi; + } + return -1; +} + +static void uio_vma_open(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count++; +} + +static void uio_vma_close(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count--; +} + +static struct page *uio_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct uio_device *idev = vma->vm_private_data; + struct page* page = NOPAGE_SIGBUS; + + int mi = uio_find_mem_index(vma); + if (mi < 0) + return page; + + if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL) + page = virt_to_page(idev->info->mem[mi].addr); + else + page = vmalloc_to_page((void*)idev->info->mem[mi].addr); + get_page(page); + if (type) + *type = VM_FAULT_MINOR; + return page; +} + +static struct vm_operations_struct uio_vm_ops = { + .open = uio_vma_open, + .close = uio_vma_close, + .nopage = uio_vma_nopage, +}; + +static int uio_mmap_physical(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + int mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_RESERVED; + + return remap_pfn_range(vma, + vma->vm_start, + idev->info->mem[mi].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int uio_mmap_logical(struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &uio_vm_ops; + uio_vma_open(vma); + return 0; +} + +static int uio_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + int mi; + unsigned long requested_pages, actual_pages; + int ret = 0; + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + + vma->vm_private_data = idev; + + mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; + if (requested_pages > actual_pages) + return -EINVAL; + + if (idev->info->mmap) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->mmap(idev->info, vma); + module_put(idev->owner); + return ret; + } + + switch (idev->info->mem[mi].memtype) { + case UIO_MEM_PHYS: + return uio_mmap_physical(vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + return uio_mmap_logical(vma); + default: + return -EINVAL; + } +} + +static struct file_operations uio_fops = { + .owner = THIS_MODULE, + .open = uio_open, + .release = uio_release, + .read = uio_read, + .mmap = uio_mmap, + .poll = uio_poll, + .fasync = uio_fasync, +}; + +static int uio_major_init(void) +{ + uio_major = register_chrdev(0, "uio", &uio_fops); + if (uio_major < 0) + return uio_major; + return 0; +} + +static void uio_major_cleanup(void) +{ + unregister_chrdev(uio_major, "uio"); +} + +static int init_uio_class(void) +{ + int ret = 0; + + if (uio_class != NULL) { + kref_get(&uio_class->kref); + goto exit; + } + + /* This is the first time in here, set everything up properly */ + ret = uio_major_init(); + if (ret) + goto exit; + + uio_class = kzalloc(sizeof(*uio_class), GFP_KERNEL); + if (!uio_class) { + ret = -ENOMEM; + goto err_kzalloc; + } + + kref_init(&uio_class->kref); + uio_class->class = class_create(THIS_MODULE, "uio"); + if (IS_ERR(uio_class->class)) { + ret = IS_ERR(uio_class->class); + printk(KERN_ERR "class_create failed for uio\n"); + goto err_class_create; + } + return 0; + +err_class_create: + kfree(uio_class); + uio_class = NULL; +err_kzalloc: + uio_major_cleanup(); +exit: + return ret; +} + +static void release_uio_class(struct kref *kref) +{ + /* Ok, we cheat as we know we only have one uio_class */ + class_destroy(uio_class->class); + kfree(uio_class); + uio_major_cleanup(); + uio_class = NULL; +} + +static void uio_class_destroy(void) +{ + if (uio_class) + kref_put(&uio_class->kref, release_uio_class); +} + +/** + * uio_register_device - register a new userspace IO device + * @owner: module that creates the new device + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ +int __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info) +{ + struct uio_device *idev; + int ret = 0; + + if (!parent || !info || !info->name || !info->version) + return -EINVAL; + + info->uio_dev = NULL; + + ret = init_uio_class(); + if (ret) + return ret; + + idev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!idev) { + ret = -ENOMEM; + goto err_kzalloc; + } + + idev->owner = owner; + idev->info = info; + init_waitqueue_head(&idev->wait); + atomic_set(&idev->event, 0); + + ret = uio_get_minor(idev); + if (ret) + goto err_get_minor; + + idev->dev = device_create(uio_class->class, parent, + MKDEV(uio_major, idev->minor), + "uio%d", idev->minor); + if (IS_ERR(idev->dev)) { + printk(KERN_ERR "UIO: device register failed\n"); + ret = PTR_ERR(idev->dev); + goto err_device_create; + } + dev_set_drvdata(idev->dev, idev); + + ret = uio_dev_add_attributes(idev); + if (ret) + goto err_uio_dev_add_attributes; + + info->uio_dev = idev; + + if (idev->info->irq >= 0) { + ret = request_irq(idev->info->irq, uio_interrupt, + idev->info->irq_flags, idev->info->name, idev); + if (ret) + goto err_request_irq; + } + + return 0; + +err_request_irq: + uio_dev_del_attributes(idev); +err_uio_dev_add_attributes: + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); +err_device_create: + uio_free_minor(idev); +err_get_minor: + kfree(idev); +err_kzalloc: + uio_class_destroy(); + return ret; +} +EXPORT_SYMBOL_GPL(__uio_register_device); + +/** + * uio_unregister_device - unregister a industrial IO device + * @info: UIO device capabilities + * + */ +void uio_unregister_device(struct uio_info *info) +{ + struct uio_device *idev; + + if (!info || !info->uio_dev) + return; + + idev = info->uio_dev; + + uio_free_minor(idev); + + if (info->irq >= 0) + free_irq(info->irq, idev); + + uio_dev_del_attributes(idev); + + dev_set_drvdata(idev->dev, NULL); + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); + kfree(idev); + uio_class_destroy(); + + return; +} +EXPORT_SYMBOL_GPL(uio_unregister_device); + +static int __init uio_init(void) +{ + return 0; +} + +static void __exit uio_exit(void) +{ +} + +module_init(uio_init) +module_exit(uio_exit) +MODULE_LICENSE("GPL v2"); diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c new file mode 100644 index 00000000000..838bae46083 --- /dev/null +++ b/drivers/uio/uio_cif.c @@ -0,0 +1,156 @@ +/* + * UIO Hilscher CIF card driver + * + * (C) 2007 Hans J. Koch <hjk@linutronix.de> + * Original code (C) 2005 Benedikt Spranger <b.spranger@linutronix.de> + * + * Licensed under GPL version 2 only. + * + */ + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/uio_driver.h> + +#include <asm/io.h> + +#ifndef PCI_DEVICE_ID_PLX_9030 +#define PCI_DEVICE_ID_PLX_9030 0x9030 +#endif + +#define PLX9030_INTCSR 0x4C +#define INTSCR_INT1_ENABLE 0x01 +#define INTSCR_INT1_STATUS 0x04 +#define INT1_ENABLED_AND_ACTIVE (INTSCR_INT1_ENABLE | INTSCR_INT1_STATUS) + +#define PCI_SUBVENDOR_ID_PEP 0x1518 +#define CIF_SUBDEVICE_PROFIBUS 0x430 +#define CIF_SUBDEVICE_DEVICENET 0x432 + + +static irqreturn_t hilscher_handler(int irq, struct uio_info *dev_info) +{ + void __iomem *plx_intscr = dev_info->mem[0].internal_addr + + PLX9030_INTCSR; + + if ((ioread8(plx_intscr) & INT1_ENABLED_AND_ACTIVE) + != INT1_ENABLED_AND_ACTIVE) + return IRQ_NONE; + + /* Disable interrupt */ + iowrite8(ioread8(plx_intscr) & ~INTSCR_INT1_ENABLE, plx_intscr); + return IRQ_HANDLED; +} + +static int __devinit hilscher_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct uio_info *info; + + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (pci_enable_device(dev)) + goto out_free; + + if (pci_request_regions(dev, "hilscher")) + goto out_disable; + + info->mem[0].addr = pci_resource_start(dev, 0); + if (!info->mem[0].addr) + goto out_release; + info->mem[0].internal_addr = ioremap(pci_resource_start(dev, 0), + pci_resource_len(dev, 0)); + if (!info->mem[0].internal_addr) + goto out_release; + + info->mem[0].size = pci_resource_len(dev, 0); + info->mem[0].memtype = UIO_MEM_PHYS; + info->mem[1].addr = pci_resource_start(dev, 2); + info->mem[1].size = pci_resource_len(dev, 2); + info->mem[1].memtype = UIO_MEM_PHYS; + switch (id->subdevice) { + case CIF_SUBDEVICE_PROFIBUS: + info->name = "CIF_Profibus"; + break; + case CIF_SUBDEVICE_DEVICENET: + info->name = "CIF_Devicenet"; + break; + default: + info->name = "CIF_???"; + } + info->version = "0.0.1"; + info->irq = dev->irq; + info->irq_flags = IRQF_DISABLED | IRQF_SHARED; + info->handler = hilscher_handler; + + if (uio_register_device(&dev->dev, info)) + goto out_unmap; + + pci_set_drvdata(dev, info); + + return 0; +out_unmap: + iounmap(info->mem[0].internal_addr); +out_release: + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_free: + kfree (info); + return -ENODEV; +} + +static void hilscher_pci_remove(struct pci_dev *dev) +{ + struct uio_info *info = pci_get_drvdata(dev); + + uio_unregister_device(info); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + iounmap(info->mem[0].internal_addr); + + kfree (info); +} + +static struct pci_device_id hilscher_pci_ids[] = { + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_PROFIBUS, + }, + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_DEVICENET, + }, + { 0, } +}; + +static struct pci_driver hilscher_pci_driver = { + .name = "hilscher", + .id_table = hilscher_pci_ids, + .probe = hilscher_pci_probe, + .remove = hilscher_pci_remove, +}; + +static int __init hilscher_init_module(void) +{ + return pci_register_driver(&hilscher_pci_driver); +} + +static void __exit hilscher_exit_module(void) +{ + pci_unregister_driver(&hilscher_pci_driver); +} + +module_init(hilscher_init_module); +module_exit(hilscher_exit_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hans J. Koch, Benedikt Spranger"); diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 73c49362cd4..654857493a8 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -29,13 +29,6 @@ #include "hcd.h" #include "usb.h" -#define VERBOSE_DEBUG 0 - -#if VERBOSE_DEBUG -#define dev_vdbg dev_dbg -#else -#define dev_vdbg(dev, fmt, args...) do { } while (0) -#endif #ifdef CONFIG_HOTPLUG diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index d6c5f1150ae..349b8166f34 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1777,14 +1777,13 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kmalloc (sizeof *dev, GFP_KERNEL); + dev = kzalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ pr_debug("enomem %s\n", pci_name(pdev)); retval = -ENOMEM; goto done; } - memset(dev, 0, sizeof *dev); spin_lock_init(&dev->lock); dev->pdev = pdev; dev->gadget.ops = &goku_ops; diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index dd33ff0ae4c..38138bb9ddb 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -1427,7 +1427,7 @@ static int __init gs_bind(struct usb_gadget *gadget) gs_acm_config_desc.bmAttributes |= USB_CONFIG_ATT_WAKEUP; } - gs_device = dev = kmalloc(sizeof(struct gs_dev), GFP_KERNEL); + gs_device = dev = kzalloc(sizeof(struct gs_dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; @@ -1435,7 +1435,6 @@ static int __init gs_bind(struct usb_gadget *gadget) init_utsname()->sysname, init_utsname()->release, gadget->name); - memset(dev, 0, sizeof(struct gs_dev)); dev->dev_gadget = gadget; spin_lock_init(&dev->dev_lock); INIT_LIST_HEAD(&dev->dev_req_list); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 2038125b7f8..6edf4097d2d 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -171,11 +171,10 @@ static int ohci_urb_enqueue ( } /* allocate the private part of the URB */ - urb_priv = kmalloc (sizeof (urb_priv_t) + size * sizeof (struct td *), + urb_priv = kzalloc (sizeof (urb_priv_t) + size * sizeof (struct td *), mem_flags); if (!urb_priv) return -ENOMEM; - memset (urb_priv, 0, sizeof (urb_priv_t) + size * sizeof (struct td *)); INIT_LIST_HEAD (&urb_priv->pending); urb_priv->length = size; urb_priv->ed = ed; diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 2d0e73b2009..5da63f53500 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -278,10 +278,9 @@ static int sl811_cs_probe(struct pcmcia_device *link) { local_info_t *local; - local = kmalloc(sizeof(local_info_t), GFP_KERNEL); + local = kzalloc(sizeof(local_info_t), GFP_KERNEL); if (!local) return -ENOMEM; - memset(local, 0, sizeof(local_info_t)); local->p_dev = link; link->priv = local; diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 6c9dc2e69c8..a7a1c891bfa 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -447,13 +447,12 @@ static int clcdfb_probe(struct amba_device *dev, void *id) goto out; } - fb = kmalloc(sizeof(struct clcd_fb), GFP_KERNEL); + fb = kzalloc(sizeof(struct clcd_fb), GFP_KERNEL); if (!fb) { printk(KERN_INFO "CLCD: could not allocate new clcd_fb struct\n"); ret = -ENOMEM; goto free_region; } - memset(fb, 0, sizeof(struct clcd_fb)); fb->dev = dev; fb->board = board; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index ef330e34d03..0c7bf75732e 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -2937,12 +2937,11 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev, /* nothing */ ; j = i + 4; - par->mmap_map = kmalloc(j * sizeof(*par->mmap_map), GFP_ATOMIC); + par->mmap_map = kcalloc(j, sizeof(*par->mmap_map), GFP_ATOMIC); if (!par->mmap_map) { PRINTKE("atyfb_setup_sparc() can't alloc mmap_map\n"); return -ENOMEM; } - memset(par->mmap_map, 0, j * sizeof(*par->mmap_map)); for (i = 0, j = 2; i < 6 && pdev->resource[i].start; i++) { struct resource *rp = &pdev->resource[i]; diff --git a/drivers/video/au1200fb.c b/drivers/video/au1200fb.c index dbf4ec3f6d5..03e57ef8837 100644 --- a/drivers/video/au1200fb.c +++ b/drivers/video/au1200fb.c @@ -1589,11 +1589,10 @@ static int au1200fb_init_fbinfo(struct au1200fb_device *fbdev) return -EFAULT; } - fbi->pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL); + fbi->pseudo_palette = kcalloc(16, sizeof(u32), GFP_KERNEL); if (!fbi->pseudo_palette) { return -ENOMEM; } - memset(fbi->pseudo_palette, 0, sizeof(u32) * 16); if (fb_alloc_cmap(&fbi->cmap, AU1200_LCD_NBR_PALETTE_ENTRIES, 0) < 0) { print_err("Fail to allocate colormap (%d entries)", diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c index 50b78af0fa2..dea6579941b 100644 --- a/drivers/video/clps711xfb.c +++ b/drivers/video/clps711xfb.c @@ -366,11 +366,10 @@ int __init clps711xfb_init(void) if (fb_get_options("clps711xfb", NULL)) return -ENODEV; - cfb = kmalloc(sizeof(*cfb), GFP_KERNEL); + cfb = kzalloc(sizeof(*cfb), GFP_KERNEL); if (!cfb) goto out; - memset(cfb, 0, sizeof(*cfb)); strcpy(cfb->fix.id, "clps711x"); cfb->fbops = &clps7111fb_ops; diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 7a6eeda5ae9..30ede6e8830 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -1221,11 +1221,10 @@ cyberpro_alloc_fb_info(unsigned int id, char *name) { struct cfb_info *cfb; - cfb = kmalloc(sizeof(struct cfb_info), GFP_KERNEL); + cfb = kzalloc(sizeof(struct cfb_info), GFP_KERNEL); if (!cfb) return NULL; - memset(cfb, 0, sizeof(struct cfb_info)); cfb->id = id; diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c index 0f88c30f94f..f9300266044 100644 --- a/drivers/video/pvr2fb.c +++ b/drivers/video/pvr2fb.c @@ -1082,13 +1082,12 @@ static int __init pvr2fb_init(void) #endif size = sizeof(struct fb_info) + sizeof(struct pvr2fb_par) + 16 * sizeof(u32); - fb_info = kmalloc(size, GFP_KERNEL); + fb_info = kzalloc(size, GFP_KERNEL); if (!fb_info) { printk(KERN_ERR "Failed to allocate memory for fb_info\n"); return -ENOMEM; } - memset(fb_info, 0, size); currentpar = (struct pvr2fb_par *)(fb_info + 1); diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c index 3d7507ad55f..b855f4a34af 100644 --- a/drivers/video/savage/savagefb_driver.c +++ b/drivers/video/savage/savagefb_driver.c @@ -2174,11 +2174,10 @@ static int __devinit savage_init_fb_info(struct fb_info *info, #if defined(CONFIG_FB_SAVAGE_ACCEL) /* FIFO size + padding for commands */ - info->pixmap.addr = kmalloc(8*1024, GFP_KERNEL); + info->pixmap.addr = kcalloc(8, 1024, GFP_KERNEL); err = -ENOMEM; if (info->pixmap.addr) { - memset(info->pixmap.addr, 0, 8*1024); info->pixmap.size = 8*1024; info->pixmap.scan_align = 4; info->pixmap.buf_align = 4; diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c index ad66f070acb..7b0cef9ca8f 100644 --- a/drivers/video/valkyriefb.c +++ b/drivers/video/valkyriefb.c @@ -356,10 +356,9 @@ int __init valkyriefb_init(void) } #endif /* ppc (!CONFIG_MAC) */ - p = kmalloc(sizeof(*p), GFP_ATOMIC); + p = kzalloc(sizeof(*p), GFP_ATOMIC); if (p == 0) return -ENOMEM; - memset(p, 0, sizeof(*p)); /* Map in frame buffer and registers */ if (!request_mem_region(frame_buffer_phys, 0x100000, "valkyriefb")) { diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c index 6f9d880ab2e..d356da5709f 100644 --- a/drivers/w1/masters/matrox_w1.c +++ b/drivers/w1/masters/matrox_w1.c @@ -164,7 +164,7 @@ static int __devinit matrox_w1_probe(struct pci_dev *pdev, const struct pci_devi if (pdev->vendor != PCI_VENDOR_ID_MATROX || pdev->device != PCI_DEVICE_ID_MATROX_G400) return -ENODEV; - dev = kmalloc(sizeof(struct matrox_device) + + dev = kzalloc(sizeof(struct matrox_device) + sizeof(struct w1_bus_master), GFP_KERNEL); if (!dev) { dev_err(&pdev->dev, @@ -173,7 +173,6 @@ static int __devinit matrox_w1_probe(struct pci_dev *pdev, const struct pci_devi return -ENOMEM; } - memset(dev, 0, sizeof(struct matrox_device) + sizeof(struct w1_bus_master)); dev->bus_master = (struct w1_bus_master *)(dev + 1); diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index cab56005dd4..858c16a544c 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -266,10 +266,9 @@ static int w1_f23_add_slave(struct w1_slave *sl) #ifdef CONFIG_W1_SLAVE_DS2433_CRC struct w1_f23_data *data; - data = kmalloc(sizeof(struct w1_f23_data), GFP_KERNEL); + data = kzalloc(sizeof(struct w1_f23_data), GFP_KERNEL); if (!data) return -ENOMEM; - memset(data, 0, sizeof(struct w1_f23_data)); sl->family_data = data; #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index c6332108f1c..8d7ab74170d 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -520,7 +520,7 @@ static int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) int err; struct w1_netlink_msg msg; - sl = kmalloc(sizeof(struct w1_slave), GFP_KERNEL); + sl = kzalloc(sizeof(struct w1_slave), GFP_KERNEL); if (!sl) { dev_err(&dev->dev, "%s: failed to allocate new slave device.\n", @@ -528,7 +528,6 @@ static int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) return -ENOMEM; } - memset(sl, 0, sizeof(*sl)); sl->owner = THIS_MODULE; sl->master = dev; diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 258defdb2ef..2fbd8dd16df 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -41,7 +41,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, /* * We are in process context(kernel thread), so can sleep. */ - dev = kmalloc(sizeof(struct w1_master) + sizeof(struct w1_bus_master), GFP_KERNEL); + dev = kzalloc(sizeof(struct w1_master) + sizeof(struct w1_bus_master), GFP_KERNEL); if (!dev) { printk(KERN_ERR "Failed to allocate %zd bytes for new w1 device.\n", @@ -49,7 +49,6 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, return NULL; } - memset(dev, 0, sizeof(struct w1_master) + sizeof(struct w1_bus_master)); dev->bus_master = (struct w1_bus_master *)(dev + 1); |