diff options
Diffstat (limited to 'drivers')
72 files changed, 4629 insertions, 1304 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index a442c8f29fc..48bbdbe43e6 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -52,6 +52,8 @@ source "drivers/i2c/Kconfig" source "drivers/spi/Kconfig" +source "drivers/pps/Kconfig" + source "drivers/gpio/Kconfig" source "drivers/w1/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 00b44f4ccf0..bc4205d2fc3 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_INPUT) += input/ obj-$(CONFIG_I2O) += message/ obj-$(CONFIG_RTC_LIB) += rtc/ obj-y += i2c/ media/ +obj-$(CONFIG_PPS) += pps/ obj-$(CONFIG_W1) += w1/ obj-$(CONFIG_POWER_SUPPLY) += power/ obj-$(CONFIG_HWMON) += hwmon/ diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 30bae6de6a0..0bd01f49cfd 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -807,7 +807,7 @@ if RTC_LIB=n config RTC tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)" depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \ - && !ARM && !SUPERH && !S390 && !AVR32 + && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 4d745a89504..4159292e35c 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -1593,7 +1593,7 @@ static unsigned int card_count; static int __devinit isicom_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - unsigned int signature, index; + unsigned int uninitialized_var(signature), index; int retval = -EPERM; struct isi_board *board = NULL; diff --git a/drivers/char/mem.c b/drivers/char/mem.c index f96d0bef855..afa8813e737 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -863,59 +863,58 @@ static const struct file_operations kmsg_fops = { .write = kmsg_write, }; -static int memory_open(struct inode * inode, struct file * filp) -{ - int ret = 0; - - lock_kernel(); - switch (iminor(inode)) { - case 1: - filp->f_op = &mem_fops; - filp->f_mapping->backing_dev_info = - &directly_mappable_cdev_bdi; - break; +static const struct { + unsigned int minor; + char *name; + umode_t mode; + const struct file_operations *fops; + struct backing_dev_info *dev_info; +} devlist[] = { /* list of minor devices */ + {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops, + &directly_mappable_cdev_bdi}, #ifdef CONFIG_DEVKMEM - case 2: - filp->f_op = &kmem_fops; - filp->f_mapping->backing_dev_info = - &directly_mappable_cdev_bdi; - break; + {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops, + &directly_mappable_cdev_bdi}, #endif - case 3: - filp->f_op = &null_fops; - break; + {3, "null", S_IRUGO | S_IWUGO, &null_fops, NULL}, #ifdef CONFIG_DEVPORT - case 4: - filp->f_op = &port_fops; - break; + {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops, NULL}, #endif - case 5: - filp->f_mapping->backing_dev_info = &zero_bdi; - filp->f_op = &zero_fops; - break; - case 7: - filp->f_op = &full_fops; - break; - case 8: - filp->f_op = &random_fops; - break; - case 9: - filp->f_op = &urandom_fops; - break; - case 11: - filp->f_op = &kmsg_fops; - break; + {5, "zero", S_IRUGO | S_IWUGO, &zero_fops, &zero_bdi}, + {7, "full", S_IRUGO | S_IWUGO, &full_fops, NULL}, + {8, "random", S_IRUGO | S_IWUSR, &random_fops, NULL}, + {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops, NULL}, + {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops, NULL}, #ifdef CONFIG_CRASH_DUMP - case 12: - filp->f_op = &oldmem_fops; - break; + {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops, NULL}, #endif - default: - unlock_kernel(); - return -ENXIO; +}; + +static int memory_open(struct inode *inode, struct file *filp) +{ + int ret = 0; + int i; + + lock_kernel(); + + for (i = 0; i < ARRAY_SIZE(devlist); i++) { + if (devlist[i].minor == iminor(inode)) { + filp->f_op = devlist[i].fops; + if (devlist[i].dev_info) { + filp->f_mapping->backing_dev_info = + devlist[i].dev_info; + } + + break; + } } - if (filp->f_op && filp->f_op->open) - ret = filp->f_op->open(inode,filp); + + if (i == ARRAY_SIZE(devlist)) + ret = -ENXIO; + else + if (filp->f_op && filp->f_op->open) + ret = filp->f_op->open(inode, filp); + unlock_kernel(); return ret; } @@ -924,30 +923,6 @@ static const struct file_operations memory_fops = { .open = memory_open, /* just a selector for the real open */ }; -static const struct { - unsigned int minor; - char *name; - umode_t mode; - const struct file_operations *fops; -} devlist[] = { /* list of minor devices */ - {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, -#ifdef CONFIG_DEVKMEM - {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, -#endif - {3, "null", S_IRUGO | S_IWUGO, &null_fops}, -#ifdef CONFIG_DEVPORT - {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, -#endif - {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, - {7, "full", S_IRUGO | S_IWUGO, &full_fops}, - {8, "random", S_IRUGO | S_IWUSR, &random_fops}, - {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, - {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops}, -#ifdef CONFIG_CRASH_DUMP - {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops}, -#endif -}; - static struct class *mem_class; static int __init chr_dev_init(void) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index c84c34fb123..432655bcb04 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -114,8 +114,7 @@ static ssize_t pp_read (struct file * file, char __user * buf, size_t count, if (!(pp->flags & PP_CLAIMED)) { /* Don't have the port claimed */ - printk (KERN_DEBUG CHRDEV "%x: claim the port first\n", - minor); + pr_debug(CHRDEV "%x: claim the port first\n", minor); return -EINVAL; } @@ -198,8 +197,7 @@ static ssize_t pp_write (struct file * file, const char __user * buf, if (!(pp->flags & PP_CLAIMED)) { /* Don't have the port claimed */ - printk (KERN_DEBUG CHRDEV "%x: claim the port first\n", - minor); + pr_debug(CHRDEV "%x: claim the port first\n", minor); return -EINVAL; } @@ -313,7 +311,7 @@ static int register_device (int minor, struct pp_struct *pp) } pp->pdev = pdev; - printk (KERN_DEBUG "%s: registered pardevice\n", name); + pr_debug("%s: registered pardevice\n", name); return 0; } @@ -343,8 +341,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) int ret; if (pp->flags & PP_CLAIMED) { - printk (KERN_DEBUG CHRDEV - "%x: you've already got it!\n", minor); + pr_debug(CHRDEV "%x: you've already got it!\n", minor); return -EINVAL; } @@ -379,7 +376,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } case PPEXCL: if (pp->pdev) { - printk (KERN_DEBUG CHRDEV "%x: too late for PPEXCL; " + pr_debug(CHRDEV "%x: too late for PPEXCL; " "already registered\n", minor); if (pp->flags & PP_EXCL) /* But it's not really an error. */ @@ -491,8 +488,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* Everything else requires the port to be claimed, so check * that now. */ if ((pp->flags & PP_CLAIMED) == 0) { - printk (KERN_DEBUG CHRDEV "%x: claim the port first\n", - minor); + pr_debug(CHRDEV "%x: claim the port first\n", minor); return -EINVAL; } @@ -624,8 +620,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; default: - printk (KERN_DEBUG CHRDEV "%x: What? (cmd=0x%x)\n", minor, - cmd); + pr_debug(CHRDEV "%x: What? (cmd=0x%x)\n", minor, cmd); return -EINVAL; } @@ -698,9 +693,8 @@ static int pp_release (struct inode * inode, struct file * file) } if (compat_negot) { parport_negotiate (pp->pdev->port, IEEE1284_MODE_COMPAT); - printk (KERN_DEBUG CHRDEV - "%x: negotiated back to compatibility mode because " - "user-space forgot\n", minor); + pr_debug(CHRDEV "%x: negotiated back to compatibility " + "mode because user-space forgot\n", minor); } if (pp->flags & PP_CLAIMED) { @@ -713,7 +707,7 @@ static int pp_release (struct inode * inode, struct file * file) info->phase = pp->saved_state.phase; parport_release (pp->pdev); if (compat_negot != 1) { - printk (KERN_DEBUG CHRDEV "%x: released pardevice " + pr_debug(CHRDEV "%x: released pardevice " "because user-space forgot\n", minor); } } @@ -723,8 +717,7 @@ static int pp_release (struct inode * inode, struct file * file) parport_unregister_device (pp->pdev); kfree (name); pp->pdev = NULL; - printk (KERN_DEBUG CHRDEV "%x: unregistered pardevice\n", - minor); + pr_debug(CHRDEV "%x: unregistered pardevice\n", minor); } kfree (pp); diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 39a05b5fa9c..0db35857e4d 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -121,20 +121,17 @@ static struct sysrq_key_op sysrq_unraw_op = { #define sysrq_unraw_op (*(struct sysrq_key_op *)0) #endif /* CONFIG_VT */ -#ifdef CONFIG_KEXEC -static void sysrq_handle_crashdump(int key, struct tty_struct *tty) +static void sysrq_handle_crash(int key, struct tty_struct *tty) { - crash_kexec(get_irq_regs()); + char *killer = NULL; + *killer = 1; } static struct sysrq_key_op sysrq_crashdump_op = { - .handler = sysrq_handle_crashdump, - .help_msg = "Crashdump", - .action_msg = "Trigger a crashdump", + .handler = sysrq_handle_crash, + .help_msg = "Crash", + .action_msg = "Trigger a crash", .enable_mask = SYSRQ_ENABLE_DUMP, }; -#else -#define sysrq_crashdump_op (*(struct sysrq_key_op *)0) -#endif static void sysrq_handle_reboot(int key, struct tty_struct *tty) { diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index ab4f3592a11..4339b1a879c 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -5,7 +5,7 @@ # menuconfig EDAC - bool "EDAC - error detection and reporting" + bool "EDAC (Error Detection And Correction) reporting" depends on HAS_IOMEM depends on X86 || PPC help @@ -232,4 +232,13 @@ config EDAC_AMD8111 Note, add more Kconfig dependency if it's adopted on some machine other than Maple. +config EDAC_CPC925 + tristate "IBM CPC925 Memory Controller (PPC970FX)" + depends on EDAC_MM_EDAC && PPC64 + help + Support for error detection and correction on the + IBM CPC925 Bridge and Memory Controller, which is + a companion chip to the PowerPC 970 family of + processors. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 633dc5604ee..98aa4a7db41 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -18,6 +18,7 @@ edac_core-objs += edac_pci.o edac_pci_sysfs.o endif obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o +obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index 2cb58ef743e..35b78d04bbf 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -37,7 +37,6 @@ #define AMD8111_EDAC_MOD_STR "amd8111_edac" #define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 -static int edac_dev_idx; enum amd8111_edac_devs { LPC_BRIDGE = 0, @@ -377,7 +376,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, * edac_device_ctl_info, but make use of existing * one instead. */ - dev_info->edac_idx = edac_dev_idx++; + dev_info->edac_idx = edac_device_alloc_index(); dev_info->edac_dev = edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, NULL, 0, 0, diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index cb0f639f049..c973004c002 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -227,7 +227,7 @@ static struct platform_driver cell_edac_driver = { .owner = THIS_MODULE, }, .probe = cell_edac_probe, - .remove = cell_edac_remove, + .remove = __devexit_p(cell_edac_remove), }; static int __init cell_edac_init(void) diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c new file mode 100644 index 00000000000..8c54196b5ab --- /dev/null +++ b/drivers/edac/cpc925_edac.c @@ -0,0 +1,1017 @@ +/* + * cpc925_edac.c, EDAC driver for IBM CPC925 Bridge and Memory Controller. + * + * Copyright (c) 2008 Wind River Systems, Inc. + * + * Authors: Cao Qingtao <qingtao.cao@windriver.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/edac.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define CPC925_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define CPC925_EDAC_MOD_STR "cpc925_edac" + +#define cpc925_printk(level, fmt, arg...) \ + edac_printk(level, "CPC925", fmt, ##arg) + +#define cpc925_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "CPC925", fmt, ##arg) + +/* + * CPC925 registers are of 32 bits with bit0 defined at the + * most significant bit and bit31 at that of least significant. + */ +#define CPC925_BITS_PER_REG 32 +#define CPC925_BIT(nr) (1UL << (CPC925_BITS_PER_REG - 1 - nr)) + +/* + * EDAC device names for the error detections of + * CPU Interface and Hypertransport Link. + */ +#define CPC925_CPU_ERR_DEV "cpu" +#define CPC925_HT_LINK_DEV "htlink" + +/* Suppose DDR Refresh cycle is 15.6 microsecond */ +#define CPC925_REF_FREQ 0xFA69 +#define CPC925_SCRUB_BLOCK_SIZE 64 /* bytes */ +#define CPC925_NR_CSROWS 8 + +/* + * All registers and bits definitions are taken from + * "CPC925 Bridge and Memory Controller User Manual, SA14-2761-02". + */ + +/* + * CPU and Memory Controller Registers + */ +/************************************************************ + * Processor Interface Exception Mask Register (APIMASK) + ************************************************************/ +#define REG_APIMASK_OFFSET 0x30070 +enum apimask_bits { + APIMASK_DART = CPC925_BIT(0), /* DART Exception */ + APIMASK_ADI0 = CPC925_BIT(1), /* Handshake Error on PI0_ADI */ + APIMASK_ADI1 = CPC925_BIT(2), /* Handshake Error on PI1_ADI */ + APIMASK_STAT = CPC925_BIT(3), /* Status Exception */ + APIMASK_DERR = CPC925_BIT(4), /* Data Error Exception */ + APIMASK_ADRS0 = CPC925_BIT(5), /* Addressing Exception on PI0 */ + APIMASK_ADRS1 = CPC925_BIT(6), /* Addressing Exception on PI1 */ + /* BIT(7) Reserved */ + APIMASK_ECC_UE_H = CPC925_BIT(8), /* UECC upper */ + APIMASK_ECC_CE_H = CPC925_BIT(9), /* CECC upper */ + APIMASK_ECC_UE_L = CPC925_BIT(10), /* UECC lower */ + APIMASK_ECC_CE_L = CPC925_BIT(11), /* CECC lower */ + + CPU_MASK_ENABLE = (APIMASK_DART | APIMASK_ADI0 | APIMASK_ADI1 | + APIMASK_STAT | APIMASK_DERR | APIMASK_ADRS0 | + APIMASK_ADRS1), + ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H | + APIMASK_ECC_UE_L | APIMASK_ECC_CE_L), +}; + +/************************************************************ + * Processor Interface Exception Register (APIEXCP) + ************************************************************/ +#define REG_APIEXCP_OFFSET 0x30060 +enum apiexcp_bits { + APIEXCP_DART = CPC925_BIT(0), /* DART Exception */ + APIEXCP_ADI0 = CPC925_BIT(1), /* Handshake Error on PI0_ADI */ + APIEXCP_ADI1 = CPC925_BIT(2), /* Handshake Error on PI1_ADI */ + APIEXCP_STAT = CPC925_BIT(3), /* Status Exception */ + APIEXCP_DERR = CPC925_BIT(4), /* Data Error Exception */ + APIEXCP_ADRS0 = CPC925_BIT(5), /* Addressing Exception on PI0 */ + APIEXCP_ADRS1 = CPC925_BIT(6), /* Addressing Exception on PI1 */ + /* BIT(7) Reserved */ + APIEXCP_ECC_UE_H = CPC925_BIT(8), /* UECC upper */ + APIEXCP_ECC_CE_H = CPC925_BIT(9), /* CECC upper */ + APIEXCP_ECC_UE_L = CPC925_BIT(10), /* UECC lower */ + APIEXCP_ECC_CE_L = CPC925_BIT(11), /* CECC lower */ + + CPU_EXCP_DETECTED = (APIEXCP_DART | APIEXCP_ADI0 | APIEXCP_ADI1 | + APIEXCP_STAT | APIEXCP_DERR | APIEXCP_ADRS0 | + APIEXCP_ADRS1), + UECC_EXCP_DETECTED = (APIEXCP_ECC_UE_H | APIEXCP_ECC_UE_L), + CECC_EXCP_DETECTED = (APIEXCP_ECC_CE_H | APIEXCP_ECC_CE_L), + ECC_EXCP_DETECTED = (UECC_EXCP_DETECTED | CECC_EXCP_DETECTED), +}; + +/************************************************************ + * Memory Bus Configuration Register (MBCR) +************************************************************/ +#define REG_MBCR_OFFSET 0x2190 +#define MBCR_64BITCFG_SHIFT 23 +#define MBCR_64BITCFG_MASK (1UL << MBCR_64BITCFG_SHIFT) +#define MBCR_64BITBUS_SHIFT 22 +#define MBCR_64BITBUS_MASK (1UL << MBCR_64BITBUS_SHIFT) + +/************************************************************ + * Memory Bank Mode Register (MBMR) +************************************************************/ +#define REG_MBMR_OFFSET 0x21C0 +#define MBMR_MODE_MAX_VALUE 0xF +#define MBMR_MODE_SHIFT 25 +#define MBMR_MODE_MASK (MBMR_MODE_MAX_VALUE << MBMR_MODE_SHIFT) +#define MBMR_BBA_SHIFT 24 +#define MBMR_BBA_MASK (1UL << MBMR_BBA_SHIFT) + +/************************************************************ + * Memory Bank Boundary Address Register (MBBAR) + ************************************************************/ +#define REG_MBBAR_OFFSET 0x21D0 +#define MBBAR_BBA_MAX_VALUE 0xFF +#define MBBAR_BBA_SHIFT 24 +#define MBBAR_BBA_MASK (MBBAR_BBA_MAX_VALUE << MBBAR_BBA_SHIFT) + +/************************************************************ + * Memory Scrub Control Register (MSCR) + ************************************************************/ +#define REG_MSCR_OFFSET 0x2400 +#define MSCR_SCRUB_MOD_MASK 0xC0000000 /* scrub_mod - bit0:1*/ +#define MSCR_BACKGR_SCRUB 0x40000000 /* 01 */ +#define MSCR_SI_SHIFT 16 /* si - bit8:15*/ +#define MSCR_SI_MAX_VALUE 0xFF +#define MSCR_SI_MASK (MSCR_SI_MAX_VALUE << MSCR_SI_SHIFT) + +/************************************************************ + * Memory Scrub Range Start Register (MSRSR) + ************************************************************/ +#define REG_MSRSR_OFFSET 0x2410 + +/************************************************************ + * Memory Scrub Range End Register (MSRER) + ************************************************************/ +#define REG_MSRER_OFFSET 0x2420 + +/************************************************************ + * Memory Scrub Pattern Register (MSPR) + ************************************************************/ +#define REG_MSPR_OFFSET 0x2430 + +/************************************************************ + * Memory Check Control Register (MCCR) + ************************************************************/ +#define REG_MCCR_OFFSET 0x2440 +enum mccr_bits { + MCCR_ECC_EN = CPC925_BIT(0), /* ECC high and low check */ +}; + +/************************************************************ + * Memory Check Range End Register (MCRER) + ************************************************************/ +#define REG_MCRER_OFFSET 0x2450 + +/************************************************************ + * Memory Error Address Register (MEAR) + ************************************************************/ +#define REG_MEAR_OFFSET 0x2460 +#define MEAR_BCNT_MAX_VALUE 0x3 +#define MEAR_BCNT_SHIFT 30 +#define MEAR_BCNT_MASK (MEAR_BCNT_MAX_VALUE << MEAR_BCNT_SHIFT) +#define MEAR_RANK_MAX_VALUE 0x7 +#define MEAR_RANK_SHIFT 27 +#define MEAR_RANK_MASK (MEAR_RANK_MAX_VALUE << MEAR_RANK_SHIFT) +#define MEAR_COL_MAX_VALUE 0x7FF +#define MEAR_COL_SHIFT 16 +#define MEAR_COL_MASK (MEAR_COL_MAX_VALUE << MEAR_COL_SHIFT) +#define MEAR_BANK_MAX_VALUE 0x3 +#define MEAR_BANK_SHIFT 14 +#define MEAR_BANK_MASK (MEAR_BANK_MAX_VALUE << MEAR_BANK_SHIFT) +#define MEAR_ROW_MASK 0x00003FFF + +/************************************************************ + * Memory Error Syndrome Register (MESR) + ************************************************************/ +#define REG_MESR_OFFSET 0x2470 +#define MESR_ECC_SYN_H_MASK 0xFF00 +#define MESR_ECC_SYN_L_MASK 0x00FF + +/************************************************************ + * Memory Mode Control Register (MMCR) + ************************************************************/ +#define REG_MMCR_OFFSET 0x2500 +enum mmcr_bits { + MMCR_REG_DIMM_MODE = CPC925_BIT(3), +}; + +/* + * HyperTransport Link Registers + */ +/************************************************************ + * Error Handling/Enumeration Scratch Pad Register (ERRCTRL) + ************************************************************/ +#define REG_ERRCTRL_OFFSET 0x70140 +enum errctrl_bits { /* nonfatal interrupts for */ + ERRCTRL_SERR_NF = CPC925_BIT(0), /* system error */ + ERRCTRL_CRC_NF = CPC925_BIT(1), /* CRC error */ + ERRCTRL_RSP_NF = CPC925_BIT(2), /* Response error */ + ERRCTRL_EOC_NF = CPC925_BIT(3), /* End-Of-Chain error */ + ERRCTRL_OVF_NF = CPC925_BIT(4), /* Overflow error */ + ERRCTRL_PROT_NF = CPC925_BIT(5), /* Protocol error */ + + ERRCTRL_RSP_ERR = CPC925_BIT(6), /* Response error received */ + ERRCTRL_CHN_FAL = CPC925_BIT(7), /* Sync flooding detected */ + + HT_ERRCTRL_ENABLE = (ERRCTRL_SERR_NF | ERRCTRL_CRC_NF | + ERRCTRL_RSP_NF | ERRCTRL_EOC_NF | + ERRCTRL_OVF_NF | ERRCTRL_PROT_NF), + HT_ERRCTRL_DETECTED = (ERRCTRL_RSP_ERR | ERRCTRL_CHN_FAL), +}; + +/************************************************************ + * Link Configuration and Link Control Register (LINKCTRL) + ************************************************************/ +#define REG_LINKCTRL_OFFSET 0x70110 +enum linkctrl_bits { + LINKCTRL_CRC_ERR = (CPC925_BIT(22) | CPC925_BIT(23)), + LINKCTRL_LINK_FAIL = CPC925_BIT(27), + + HT_LINKCTRL_DETECTED = (LINKCTRL_CRC_ERR | LINKCTRL_LINK_FAIL), +}; + +/************************************************************ + * Link FreqCap/Error/Freq/Revision ID Register (LINKERR) + ************************************************************/ +#define REG_LINKERR_OFFSET 0x70120 +enum linkerr_bits { + LINKERR_EOC_ERR = CPC925_BIT(17), /* End-Of-Chain error */ + LINKERR_OVF_ERR = CPC925_BIT(18), /* Receive Buffer Overflow */ + LINKERR_PROT_ERR = CPC925_BIT(19), /* Protocol error */ + + HT_LINKERR_DETECTED = (LINKERR_EOC_ERR | LINKERR_OVF_ERR | + LINKERR_PROT_ERR), +}; + +/************************************************************ + * Bridge Control Register (BRGCTRL) + ************************************************************/ +#define REG_BRGCTRL_OFFSET 0x70300 +enum brgctrl_bits { + BRGCTRL_DETSERR = CPC925_BIT(0), /* SERR on Secondary Bus */ + BRGCTRL_SECBUSRESET = CPC925_BIT(9), /* Secondary Bus Reset */ +}; + +/* Private structure for edac memory controller */ +struct cpc925_mc_pdata { + void __iomem *vbase; + unsigned long total_mem; + const char *name; + int edac_idx; +}; + +/* Private structure for common edac device */ +struct cpc925_dev_info { + void __iomem *vbase; + struct platform_device *pdev; + char *ctl_name; + int edac_idx; + struct edac_device_ctl_info *edac_dev; + void (*init)(struct cpc925_dev_info *dev_info); + void (*exit)(struct cpc925_dev_info *dev_info); + void (*check)(struct edac_device_ctl_info *edac_dev); +}; + +/* Get total memory size from Open Firmware DTB */ +static void get_total_mem(struct cpc925_mc_pdata *pdata) +{ + struct device_node *np = NULL; + const unsigned int *reg, *reg_end; + int len, sw, aw; + unsigned long start, size; + + np = of_find_node_by_type(NULL, "memory"); + if (!np) + return; + + aw = of_n_addr_cells(np); + sw = of_n_size_cells(np); + reg = (const unsigned int *)of_get_property(np, "reg", &len); + reg_end = reg + len/4; + + pdata->total_mem = 0; + do { + start = of_read_number(reg, aw); + reg += aw; + size = of_read_number(reg, sw); + reg += sw; + debugf1("%s: start 0x%lx, size 0x%lx\n", __func__, + start, size); + pdata->total_mem += size; + } while (reg < reg_end); + + of_node_put(np); + debugf0("%s: total_mem 0x%lx\n", __func__, pdata->total_mem); +} + +static void cpc925_init_csrows(struct mem_ctl_info *mci) +{ + struct cpc925_mc_pdata *pdata = mci->pvt_info; + struct csrow_info *csrow; + int index; + u32 mbmr, mbbar, bba; + unsigned long row_size, last_nr_pages = 0; + + get_total_mem(pdata); + + for (index = 0; index < mci->nr_csrows; index++) { + mbmr = __raw_readl(pdata->vbase + REG_MBMR_OFFSET + + 0x20 * index); + mbbar = __raw_readl(pdata->vbase + REG_MBBAR_OFFSET + + 0x20 + index); + bba = (((mbmr & MBMR_BBA_MASK) >> MBMR_BBA_SHIFT) << 8) | + ((mbbar & MBBAR_BBA_MASK) >> MBBAR_BBA_SHIFT); + + if (bba == 0) + continue; /* not populated */ + + csrow = &mci->csrows[index]; + + row_size = bba * (1UL << 28); /* 256M */ + csrow->first_page = last_nr_pages; + csrow->nr_pages = row_size >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + last_nr_pages = csrow->last_page + 1; + + csrow->mtype = MEM_RDDR; + csrow->edac_mode = EDAC_SECDED; + + switch (csrow->nr_channels) { + case 1: /* Single channel */ + csrow->grain = 32; /* four-beat burst of 32 bytes */ + break; + case 2: /* Dual channel */ + default: + csrow->grain = 64; /* four-beat burst of 64 bytes */ + break; + } + + switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) { + case 6: /* 0110, no way to differentiate X8 VS X16 */ + case 5: /* 0101 */ + case 8: /* 1000 */ + csrow->dtype = DEV_X16; + break; + case 7: /* 0111 */ + case 9: /* 1001 */ + csrow->dtype = DEV_X8; + break; + default: + csrow->dtype = DEV_UNKNOWN; + break; + } + } +} + +/* Enable memory controller ECC detection */ +static void cpc925_mc_init(struct mem_ctl_info *mci) +{ + struct cpc925_mc_pdata *pdata = mci->pvt_info; + u32 apimask; + u32 mccr; + + /* Enable various ECC error exceptions */ + apimask = __raw_readl(pdata->vbase + REG_APIMASK_OFFSET); + if ((apimask & ECC_MASK_ENABLE) == 0) { + apimask |= ECC_MASK_ENABLE; + __raw_writel(apimask, pdata->vbase + REG_APIMASK_OFFSET); + } + + /* Enable ECC detection */ + mccr = __raw_readl(pdata->vbase + REG_MCCR_OFFSET); + if ((mccr & MCCR_ECC_EN) == 0) { + mccr |= MCCR_ECC_EN; + __raw_writel(mccr, pdata->vbase + REG_MCCR_OFFSET); + } +} + +/* Disable memory controller ECC detection */ +static void cpc925_mc_exit(struct mem_ctl_info *mci) +{ + /* + * WARNING: + * We are supposed to clear the ECC error detection bits, + * and it will be no problem to do so. However, once they + * are cleared here if we want to re-install CPC925 EDAC + * module later, setting them up in cpc925_mc_init() will + * trigger machine check exception. + * Also, it's ok to leave ECC error detection bits enabled, + * since they are reset to 1 by default or by boot loader. + */ + + return; +} + +/* + * Revert DDR column/row/bank addresses into page frame number and + * offset in page. + * + * Suppose memory mode is 0x0111(128-bit mode, identical DIMM pairs), + * physical address(PA) bits to column address(CA) bits mappings are: + * CA 0 1 2 3 4 5 6 7 8 9 10 + * PA 59 58 57 56 55 54 53 52 51 50 49 + * + * physical address(PA) bits to bank address(BA) bits mappings are: + * BA 0 1 + * PA 43 44 + * + * physical address(PA) bits to row address(RA) bits mappings are: + * RA 0 1 2 3 4 5 6 7 8 9 10 11 12 + * PA 36 35 34 48 47 46 45 40 41 42 39 38 37 + */ +static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear, + unsigned long *pfn, unsigned long *offset, int *csrow) +{ + u32 bcnt, rank, col, bank, row; + u32 c; + unsigned long pa; + int i; + + bcnt = (mear & MEAR_BCNT_MASK) >> MEAR_BCNT_SHIFT; + rank = (mear & MEAR_RANK_MASK) >> MEAR_RANK_SHIFT; + col = (mear & MEAR_COL_MASK) >> MEAR_COL_SHIFT; + bank = (mear & MEAR_BANK_MASK) >> MEAR_BANK_SHIFT; + row = mear & MEAR_ROW_MASK; + + *csrow = rank; + +#ifdef CONFIG_EDAC_DEBUG + if (mci->csrows[rank].first_page == 0) { + cpc925_mc_printk(mci, KERN_ERR, "ECC occurs in a " + "non-populated csrow, broken hardware?\n"); + return; + } +#endif + + /* Revert csrow number */ + pa = mci->csrows[rank].first_page << PAGE_SHIFT; + + /* Revert column address */ + col += bcnt; + for (i = 0; i < 11; i++) { + c = col & 0x1; + col >>= 1; + pa |= c << (14 - i); + } + + /* Revert bank address */ + pa |= bank << 19; + + /* Revert row address, in 4 steps */ + for (i = 0; i < 3; i++) { + c = row & 0x1; + row >>= 1; + pa |= c << (26 - i); + } + + for (i = 0; i < 3; i++) { + c = row & 0x1; + row >>= 1; + pa |= c << (21 + i); + } + + for (i = 0; i < 4; i++) { + c = row & 0x1; + row >>= 1; + pa |= c << (18 - i); + } + + for (i = 0; i < 3; i++) { + c = row & 0x1; + row >>= 1; + pa |= c << (29 - i); + } + + *offset = pa & (PAGE_SIZE - 1); + *pfn = pa >> PAGE_SHIFT; + + debugf0("%s: ECC physical address 0x%lx\n", __func__, pa); +} + +static int cpc925_mc_find_channel(struct mem_ctl_info *mci, u16 syndrome) +{ + if ((syndrome & MESR_ECC_SYN_H_MASK) == 0) + return 0; + + if ((syndrome & MESR_ECC_SYN_L_MASK) == 0) + return 1; + + cpc925_mc_printk(mci, KERN_INFO, "Unexpected syndrome value: 0x%x\n", + syndrome); + return 1; +} + +/* Check memory controller registers for ECC errors */ +static void cpc925_mc_check(struct mem_ctl_info *mci) +{ + struct cpc925_mc_pdata *pdata = mci->pvt_info; + u32 apiexcp; + u32 mear; + u32 mesr; + u16 syndrome; + unsigned long pfn = 0, offset = 0; + int csrow = 0, channel = 0; + + /* APIEXCP is cleared when read */ + apiexcp = __raw_readl(pdata->vbase + REG_APIEXCP_OFFSET); + if ((apiexcp & ECC_EXCP_DETECTED) == 0) + return; + + mesr = __raw_readl(pdata->vbase + REG_MESR_OFFSET); + syndrome = mesr | (MESR_ECC_SYN_H_MASK | MESR_ECC_SYN_L_MASK); + + mear = __raw_readl(pdata->vbase + REG_MEAR_OFFSET); + + /* Revert column/row addresses into page frame number, etc */ + cpc925_mc_get_pfn(mci, mear, &pfn, &offset, &csrow); + + if (apiexcp & CECC_EXCP_DETECTED) { + cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n"); + channel = cpc925_mc_find_channel(mci, syndrome); + edac_mc_handle_ce(mci, pfn, offset, syndrome, + csrow, channel, mci->ctl_name); + } + + if (apiexcp & UECC_EXCP_DETECTED) { + cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n"); + edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name); + } + + cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n"); + cpc925_mc_printk(mci, KERN_INFO, "APIMASK 0x%08x\n", + __raw_readl(pdata->vbase + REG_APIMASK_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "APIEXCP 0x%08x\n", + apiexcp); + cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Ctrl 0x%08x\n", + __raw_readl(pdata->vbase + REG_MSCR_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Rge Start 0x%08x\n", + __raw_readl(pdata->vbase + REG_MSRSR_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Rge End 0x%08x\n", + __raw_readl(pdata->vbase + REG_MSRER_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Pattern 0x%08x\n", + __raw_readl(pdata->vbase + REG_MSPR_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Chk Ctrl 0x%08x\n", + __raw_readl(pdata->vbase + REG_MCCR_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Chk Rge End 0x%08x\n", + __raw_readl(pdata->vbase + REG_MCRER_OFFSET)); + cpc925_mc_printk(mci, KERN_INFO, "Mem Err Address 0x%08x\n", + mesr); + cpc925_mc_printk(mci, KERN_INFO, "Mem Err Syndrome 0x%08x\n", + syndrome); +} + +/******************** CPU err device********************************/ +/* Enable CPU Errors detection */ +static void cpc925_cpu_init(struct cpc925_dev_info *dev_info) +{ + u32 apimask; + + apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); + if ((apimask & CPU_MASK_ENABLE) == 0) { + apimask |= CPU_MASK_ENABLE; + __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET); + } +} + +/* Disable CPU Errors detection */ +static void cpc925_cpu_exit(struct cpc925_dev_info *dev_info) +{ + /* + * WARNING: + * We are supposed to clear the CPU error detection bits, + * and it will be no problem to do so. However, once they + * are cleared here if we want to re-install CPC925 EDAC + * module later, setting them up in cpc925_cpu_init() will + * trigger machine check exception. + * Also, it's ok to leave CPU error detection bits enabled, + * since they are reset to 1 by default. + */ + + return; +} + +/* Check for CPU Errors */ +static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev) +{ + struct cpc925_dev_info *dev_info = edac_dev->pvt_info; + u32 apiexcp; + u32 apimask; + + /* APIEXCP is cleared when read */ + apiexcp = __raw_readl(dev_info->vbase + REG_APIEXCP_OFFSET); + if ((apiexcp & CPU_EXCP_DETECTED) == 0) + return; + + apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET); + cpc925_printk(KERN_INFO, "Processor Interface Fault\n" + "Processor Interface register dump:\n"); + cpc925_printk(KERN_INFO, "APIMASK 0x%08x\n", apimask); + cpc925_printk(KERN_INFO, "APIEXCP 0x%08x\n", apiexcp); + + edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); +} + +/******************** HT Link err device****************************/ +/* Enable HyperTransport Link Error detection */ +static void cpc925_htlink_init(struct cpc925_dev_info *dev_info) +{ + u32 ht_errctrl; + + ht_errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET); + if ((ht_errctrl & HT_ERRCTRL_ENABLE) == 0) { + ht_errctrl |= HT_ERRCTRL_ENABLE; + __raw_writel(ht_errctrl, dev_info->vbase + REG_ERRCTRL_OFFSET); + } +} + +/* Disable HyperTransport Link Error detection */ +static void cpc925_htlink_exit(struct cpc925_dev_info *dev_info) +{ + u32 ht_errctrl; + + ht_errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET); + ht_errctrl &= ~HT_ERRCTRL_ENABLE; + __raw_writel(ht_errctrl, dev_info->vbase + REG_ERRCTRL_OFFSET); +} + +/* Check for HyperTransport Link errors */ +static void cpc925_htlink_check(struct edac_device_ctl_info *edac_dev) +{ + struct cpc925_dev_info *dev_info = edac_dev->pvt_info; + u32 brgctrl = __raw_readl(dev_info->vbase + REG_BRGCTRL_OFFSET); + u32 linkctrl = __raw_readl(dev_info->vbase + REG_LINKCTRL_OFFSET); + u32 errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET); + u32 linkerr = __raw_readl(dev_info->vbase + REG_LINKERR_OFFSET); + + if (!((brgctrl & BRGCTRL_DETSERR) || + (linkctrl & HT_LINKCTRL_DETECTED) || + (errctrl & HT_ERRCTRL_DETECTED) || + (linkerr & HT_LINKERR_DETECTED))) + return; + + cpc925_printk(KERN_INFO, "HT Link Fault\n" + "HT register dump:\n"); + cpc925_printk(KERN_INFO, "Bridge Ctrl 0x%08x\n", + brgctrl); + cpc925_printk(KERN_INFO, "Link Config Ctrl 0x%08x\n", + linkctrl); + cpc925_printk(KERN_INFO, "Error Enum and Ctrl 0x%08x\n", + errctrl); + cpc925_printk(KERN_INFO, "Link Error 0x%08x\n", + linkerr); + + /* Clear by write 1 */ + if (brgctrl & BRGCTRL_DETSERR) + __raw_writel(BRGCTRL_DETSERR, + dev_info->vbase + REG_BRGCTRL_OFFSET); + + if (linkctrl & HT_LINKCTRL_DETECTED) + __raw_writel(HT_LINKCTRL_DETECTED, + dev_info->vbase + REG_LINKCTRL_OFFSET); + + /* Initiate Secondary Bus Reset to clear the chain failure */ + if (errctrl & ERRCTRL_CHN_FAL) + __raw_writel(BRGCTRL_SECBUSRESET, + dev_info->vbase + REG_BRGCTRL_OFFSET); + + if (errctrl & ERRCTRL_RSP_ERR) + __raw_writel(ERRCTRL_RSP_ERR, + dev_info->vbase + REG_ERRCTRL_OFFSET); + + if (linkerr & HT_LINKERR_DETECTED) + __raw_writel(HT_LINKERR_DETECTED, + dev_info->vbase + REG_LINKERR_OFFSET); + + edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); +} + +static struct cpc925_dev_info cpc925_devs[] = { + { + .ctl_name = CPC925_CPU_ERR_DEV, + .init = cpc925_cpu_init, + .exit = cpc925_cpu_exit, + .check = cpc925_cpu_check, + }, + { + .ctl_name = CPC925_HT_LINK_DEV, + .init = cpc925_htlink_init, + .exit = cpc925_htlink_exit, + .check = cpc925_htlink_check, + }, + {0}, /* Terminated by NULL */ +}; + +/* + * Add CPU Err detection and HyperTransport Link Err detection + * as common "edac_device", they have no corresponding device + * nodes in the Open Firmware DTB and we have to add platform + * devices for them. Also, they will share the MMIO with that + * of memory controller. + */ +static void cpc925_add_edac_devices(void __iomem *vbase) +{ + struct cpc925_dev_info *dev_info; + + if (!vbase) { + cpc925_printk(KERN_ERR, "MMIO not established yet\n"); + return; + } + + for (dev_info = &cpc925_devs[0]; dev_info->init; dev_info++) { + dev_info->vbase = vbase; + dev_info->pdev = platform_device_register_simple( + dev_info->ctl_name, 0, NULL, 0); + if (IS_ERR(dev_info->pdev)) { + cpc925_printk(KERN_ERR, + "Can't register platform device for %s\n", + dev_info->ctl_name); + continue; + } + + /* + * Don't have to allocate private structure but + * make use of cpc925_devs[] instead. + */ + dev_info->edac_idx = edac_device_alloc_index(); + dev_info->edac_dev = + edac_device_alloc_ctl_info(0, dev_info->ctl_name, + 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx); + if (!dev_info->edac_dev) { + cpc925_printk(KERN_ERR, "No memory for edac device\n"); + goto err1; + } + + dev_info->edac_dev->pvt_info = dev_info; + dev_info->edac_dev->dev = &dev_info->pdev->dev; + dev_info->edac_dev->ctl_name = dev_info->ctl_name; + dev_info->edac_dev->mod_name = CPC925_EDAC_MOD_STR; + dev_info->edac_dev->dev_name = dev_name(&dev_info->pdev->dev); + + if (edac_op_state == EDAC_OPSTATE_POLL) + dev_info->edac_dev->edac_check = dev_info->check; + + if (dev_info->init) + dev_info->init(dev_info); + + if (edac_device_add_device(dev_info->edac_dev) > 0) { + cpc925_printk(KERN_ERR, + "Unable to add edac device for %s\n", + dev_info->ctl_name); + goto err2; + } + + debugf0("%s: Successfully added edac device for %s\n", + __func__, dev_info->ctl_name); + + continue; + +err2: + if (dev_info->exit) + dev_info->exit(dev_info); + edac_device_free_ctl_info(dev_info->edac_dev); +err1: + platform_device_unregister(dev_info->pdev); + } +} + +/* + * Delete the common "edac_device" for CPU Err Detection + * and HyperTransport Link Err Detection + */ +static void cpc925_del_edac_devices(void) +{ + struct cpc925_dev_info *dev_info; + + for (dev_info = &cpc925_devs[0]; dev_info->init; dev_info++) { + if (dev_info->edac_dev) { + edac_device_del_device(dev_info->edac_dev->dev); + edac_device_free_ctl_info(dev_info->edac_dev); + platform_device_unregister(dev_info->pdev); + } + + if (dev_info->exit) + dev_info->exit(dev_info); + + debugf0("%s: Successfully deleted edac device for %s\n", + __func__, dev_info->ctl_name); + } +} + +/* Convert current back-ground scrub rate into byte/sec bandwith */ +static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw) +{ + struct cpc925_mc_pdata *pdata = mci->pvt_info; + u32 mscr; + u8 si; + + mscr = __raw_readl(pdata->vbase + REG_MSCR_OFFSET); + si = (mscr & MSCR_SI_MASK) >> MSCR_SI_SHIFT; + + debugf0("%s, Mem Scrub Ctrl Register 0x%x\n", __func__, mscr); + + if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) || + (si == 0)) { + cpc925_mc_printk(mci, KERN_INFO, "Scrub mode not enabled\n"); + *bw = 0; + } else + *bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si; + + return 0; +} + +/* Return 0 for single channel; 1 for dual channel */ +static int cpc925_mc_get_channels(void __iomem *vbase) +{ + int dual = 0; + u32 mbcr; + + mbcr = __raw_readl(vbase + REG_MBCR_OFFSET); + + /* + * Dual channel only when 128-bit wide physical bus + * and 128-bit configuration. + */ + if (((mbcr & MBCR_64BITCFG_MASK) == 0) && + ((mbcr & MBCR_64BITBUS_MASK) == 0)) + dual = 1; + + debugf0("%s: %s channel\n", __func__, + (dual > 0) ? "Dual" : "Single"); + + return dual; +} + +static int __devinit cpc925_probe(struct platform_device *pdev) +{ + static int edac_mc_idx; + struct mem_ctl_info *mci; + void __iomem *vbase; + struct cpc925_mc_pdata *pdata; + struct resource *r; + int res = 0, nr_channels; + + debugf0("%s: %s platform device found!\n", __func__, pdev->name); + + if (!devres_open_group(&pdev->dev, cpc925_probe, GFP_KERNEL)) { + res = -ENOMEM; + goto out; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + cpc925_printk(KERN_ERR, "Unable to get resource\n"); + res = -ENOENT; + goto err1; + } + + if (!devm_request_mem_region(&pdev->dev, + r->start, + r->end - r->start + 1, + pdev->name)) { + cpc925_printk(KERN_ERR, "Unable to request mem region\n"); + res = -EBUSY; + goto err1; + } + + vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1); + if (!vbase) { + cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); + res = -ENOMEM; + goto err2; + } + + nr_channels = cpc925_mc_get_channels(vbase); + mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata), + CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx); + if (!mci) { + cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n"); + res = -ENOMEM; + goto err2; + } + + pdata = mci->pvt_info; + pdata->vbase = vbase; + pdata->edac_idx = edac_mc_idx++; + pdata->name = pdev->name; + + mci->dev = &pdev->dev; + platform_set_drvdata(pdev, mci); + mci->dev_name = dev_name(&pdev->dev); + mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = CPC925_EDAC_MOD_STR; + mci->mod_ver = CPC925_EDAC_REVISION; + mci->ctl_name = pdev->name; + + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = cpc925_mc_check; + + mci->ctl_page_to_phys = NULL; + mci->scrub_mode = SCRUB_SW_SRC; + mci->set_sdram_scrub_rate = NULL; + mci->get_sdram_scrub_rate = cpc925_get_sdram_scrub_rate; + + cpc925_init_csrows(mci); + + /* Setup memory controller registers */ + cpc925_mc_init(mci); + + if (edac_mc_add_mc(mci) > 0) { + cpc925_mc_printk(mci, KERN_ERR, "Failed edac_mc_add_mc()\n"); + goto err3; + } + + cpc925_add_edac_devices(vbase); + + /* get this far and it's successful */ + debugf0("%s: success\n", __func__); + + res = 0; + goto out; + +err3: + cpc925_mc_exit(mci); + edac_mc_free(mci); +err2: + devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1); +err1: + devres_release_group(&pdev->dev, cpc925_probe); +out: + return res; +} + +static int cpc925_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + /* + * Delete common edac devices before edac mc, because + * the former share the MMIO of the latter. + */ + cpc925_del_edac_devices(); + cpc925_mc_exit(mci); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static struct platform_driver cpc925_edac_driver = { + .probe = cpc925_probe, + .remove = cpc925_remove, + .driver = { + .name = "cpc925_edac", + } +}; + +static int __init cpc925_edac_init(void) +{ + int ret = 0; + + printk(KERN_INFO "IBM CPC925 EDAC driver " CPC925_EDAC_REVISION "\n"); + printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc\n"); + + /* Only support POLL mode so far */ + edac_op_state = EDAC_OPSTATE_POLL; + + ret = platform_driver_register(&cpc925_edac_driver); + if (ret) { + printk(KERN_WARNING "Failed to register %s\n", + CPC925_EDAC_MOD_STR); + } + + return ret; +} + +static void __exit cpc925_edac_exit(void) +{ + platform_driver_unregister(&cpc925_edac_driver); +} + +module_init(cpc925_edac_init); +module_exit(cpc925_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>"); +MODULE_DESCRIPTION("IBM CPC925 Bridge and MC EDAC kernel module"); diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 48d3b140983..3493c6bdb82 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -841,6 +841,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, int block_nr, const char *msg); extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, int block_nr, const char *msg); +extern int edac_device_alloc_index(void); /* * edac_pci APIs diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index a7d2c717d03..b02a6a69a8f 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -490,6 +490,20 @@ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, mutex_unlock(&device_ctls_mutex); } +/* + * edac_device_alloc_index: Allocate a unique device index number + * + * Return: + * allocated index number + */ +int edac_device_alloc_index(void) +{ + static atomic_t device_indexes = ATOMIC_INIT(0); + + return atomic_inc_return(&device_indexes) - 1; +} +EXPORT_SYMBOL_GPL(edac_device_alloc_index); + /** * edac_device_add_device: Insert the 'edac_dev' structure into the * edac_device global list and create sysfs entries associated with diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c index 3e7f4e06386..7b82eaae262 100644 --- a/drivers/gpio/max7301.c +++ b/drivers/gpio/max7301.c @@ -287,7 +287,7 @@ exit_destroy: return ret; } -static int max7301_remove(struct spi_device *spi) +static int __devexit max7301_remove(struct spi_device *spi) { struct max7301 *ts; int ret; diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 8dc0164bd51..cdb6574d25a 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -15,6 +15,10 @@ #include <linux/init.h> #include <linux/i2c.h> #include <linux/i2c/pca953x.h> +#ifdef CONFIG_OF_GPIO +#include <linux/of_platform.h> +#include <linux/of_gpio.h> +#endif #include <asm/gpio.h> @@ -32,6 +36,7 @@ static const struct i2c_device_id pca953x_id[] = { { "pca9539", 16, }, { "pca9554", 8, }, { "pca9555", 16, }, + { "pca9556", 8, }, { "pca9557", 8, }, { "max7310", 8, }, @@ -49,7 +54,9 @@ struct pca953x_chip { uint16_t reg_direction; struct i2c_client *client; + struct pca953x_platform_data *dyn_pdata; struct gpio_chip gpio_chip; + char **names; }; static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val) @@ -192,8 +199,57 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->label = chip->client->name; gc->dev = &chip->client->dev; gc->owner = THIS_MODULE; + gc->names = chip->names; } +/* + * Handlers for alternative sources of platform_data + */ +#ifdef CONFIG_OF_GPIO +/* + * Translate OpenFirmware node properties into platform_data + */ +static struct pca953x_platform_data * +pca953x_get_alt_pdata(struct i2c_client *client) +{ + struct pca953x_platform_data *pdata; + struct device_node *node; + const uint16_t *val; + + node = dev_archdata_get_node(&client->dev.archdata); + if (node == NULL) + return NULL; + + pdata = kzalloc(sizeof(struct pca953x_platform_data), GFP_KERNEL); + if (pdata == NULL) { + dev_err(&client->dev, "Unable to allocate platform_data\n"); + return NULL; + } + + pdata->gpio_base = -1; + val = of_get_property(node, "linux,gpio-base", NULL); + if (val) { + if (*val < 0) + dev_warn(&client->dev, + "invalid gpio-base in device tree\n"); + else + pdata->gpio_base = *val; + } + + val = of_get_property(node, "polarity", NULL); + if (val) + pdata->invert = *val; + + return pdata; +} +#else +static struct pca953x_platform_data * +pca953x_get_alt_pdata(struct i2c_client *client) +{ + return NULL; +} +#endif + static int __devinit pca953x_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -201,20 +257,32 @@ static int __devinit pca953x_probe(struct i2c_client *client, struct pca953x_chip *chip; int ret; + chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + pdata = client->dev.platform_data; if (pdata == NULL) { - dev_dbg(&client->dev, "no platform data\n"); - return -EINVAL; + pdata = pca953x_get_alt_pdata(client); + /* + * Unlike normal platform_data, this is allocated + * dynamically and must be freed in the driver + */ + chip->dyn_pdata = pdata; } - chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL); - if (chip == NULL) - return -ENOMEM; + if (pdata == NULL) { + dev_dbg(&client->dev, "no platform data\n"); + ret = -EINVAL; + goto out_failed; + } chip->client = client; chip->gpio_start = pdata->gpio_base; + chip->names = pdata->names; + /* initialize cached registers from their original values. * we can't share this chip with another i2c master. */ @@ -249,6 +317,7 @@ static int __devinit pca953x_probe(struct i2c_client *client, return 0; out_failed: + kfree(chip->dyn_pdata); kfree(chip); return ret; } @@ -276,6 +345,7 @@ static int pca953x_remove(struct i2c_client *client) return ret; } + kfree(chip->dyn_pdata); kfree(chip); return 0; } diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 8695809b24b..87d88dbb667 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -255,14 +255,14 @@ static void status(struct seq_file *seq, mddev_t *mddev) } -static int reconfig(mddev_t *mddev, int layout, int chunk_size) +static int reshape(mddev_t *mddev) { - int mode = layout & ModeMask; - int count = layout >> ModeShift; + int mode = mddev->new_layout & ModeMask; + int count = mddev->new_layout >> ModeShift; conf_t *conf = mddev->private; - if (chunk_size != -1) - return -EINVAL; + if (mddev->new_layout < 0) + return 0; /* new layout */ if (mode == ClearFaults) @@ -279,6 +279,7 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size) atomic_set(&conf->counters[mode], count); } else return -EINVAL; + mddev->new_layout = -1; mddev->layout = -1; /* makes sure further changes come through */ return 0; } @@ -298,8 +299,12 @@ static int run(mddev_t *mddev) { mdk_rdev_t *rdev; int i; + conf_t *conf; + + if (md_check_no_bitmap(mddev)) + return -EINVAL; - conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); + conf = kmalloc(sizeof(*conf), GFP_KERNEL); if (!conf) return -ENOMEM; @@ -315,7 +320,7 @@ static int run(mddev_t *mddev) md_set_array_sectors(mddev, faulty_size(mddev, 0, 0)); mddev->private = conf; - reconfig(mddev, mddev->layout, -1); + reshape(mddev); return 0; } @@ -338,7 +343,7 @@ static struct mdk_personality faulty_personality = .run = run, .stop = stop, .status = status, - .reconfig = reconfig, + .check_reshape = reshape, .size = faulty_size, }; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 64f1f3e046e..15c8b7b25a9 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -27,19 +27,27 @@ */ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { - dev_info_t *hash; - linear_conf_t *conf = mddev_to_conf(mddev); - sector_t idx = sector >> conf->sector_shift; + int lo, mid, hi; + linear_conf_t *conf; + + lo = 0; + hi = mddev->raid_disks - 1; + conf = rcu_dereference(mddev->private); /* - * sector_div(a,b) returns the remainer and sets a to a/b + * Binary Search */ - (void)sector_div(idx, conf->spacing); - hash = conf->hash_table[idx]; - while (sector >= hash->num_sectors + hash->start_sector) - hash++; - return hash; + while (hi > lo) { + + mid = (hi + lo) / 2; + if (sector < conf->disks[mid].end_sector) + hi = mid; + else + lo = mid + 1; + } + + return conf->disks + lo; } /** @@ -59,8 +67,10 @@ static int linear_mergeable_bvec(struct request_queue *q, unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + rcu_read_lock(); dev0 = which_dev(mddev, sector); - maxsectors = dev0->num_sectors - (sector - dev0->start_sector); + maxsectors = dev0->end_sector - sector; + rcu_read_unlock(); if (maxsectors < bio_sectors) maxsectors = 0; @@ -79,46 +89,57 @@ static int linear_mergeable_bvec(struct request_queue *q, static void linear_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf; int i; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i=0; i < mddev->raid_disks; i++) { struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); blk_unplug(r_queue); } + rcu_read_unlock(); } static int linear_congested(void *data, int bits) { mddev_t *mddev = data; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf; int i, ret = 0; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + + rcu_read_unlock(); return ret; } static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf; + sector_t array_sectors; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); + array_sectors = conf->array_sectors; + rcu_read_unlock(); - return conf->array_sectors; + return array_sectors; } static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) { linear_conf_t *conf; - dev_info_t **table; mdk_rdev_t *rdev; - int i, nb_zone, cnt; - sector_t min_sectors; - sector_t curr_sector; + int i, cnt; conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), GFP_KERNEL); @@ -131,6 +152,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) list_for_each_entry(rdev, &mddev->disks, same_set) { int j = rdev->raid_disk; dev_info_t *disk = conf->disks + j; + sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { printk("linear: disk numbering problem. Aborting!\n"); @@ -138,6 +160,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) } disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); @@ -149,102 +176,24 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - disk->num_sectors = rdev->sectors; conf->array_sectors += rdev->sectors; - cnt++; + } if (cnt != raid_disks) { printk("linear: not enough drives present. Aborting!\n"); goto out; } - min_sectors = conf->array_sectors; - sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); - if (min_sectors == 0) - min_sectors = 1; - - /* min_sectors is the minimum spacing that will fit the hash - * table in one PAGE. This may be much smaller than needed. - * We find the smallest non-terminal set of consecutive devices - * that is larger than min_sectors and use the size of that as - * the actual spacing - */ - conf->spacing = conf->array_sectors; - for (i=0; i < cnt-1 ; i++) { - sector_t tmp = 0; - int j; - for (j = i; j < cnt - 1 && tmp < min_sectors; j++) - tmp += conf->disks[j].num_sectors; - if (tmp >= min_sectors && tmp < conf->spacing) - conf->spacing = tmp; - } - - /* spacing may be too large for sector_div to work with, - * so we might need to pre-shift - */ - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - sector_t space = conf->spacing; - while (space > (sector_t)(~(u32)0)) { - space >>= 1; - conf->sector_shift++; - } - } /* - * This code was restructured to work around a gcc-2.95.3 internal - * compiler error. Alter it with care. + * Here we calculate the device offsets. */ - { - sector_t sz; - unsigned round; - unsigned long base; - - sz = conf->array_sectors >> conf->sector_shift; - sz += 1; /* force round-up */ - base = conf->spacing >> conf->sector_shift; - round = sector_div(sz, base); - nb_zone = sz + (round ? 1 : 0); - } - BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *)); - - conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone, - GFP_KERNEL); - if (!conf->hash_table) - goto out; + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; - /* - * Here we generate the linear hash table - * First calculate the device offsets. - */ - conf->disks[0].start_sector = 0; for (i = 1; i < raid_disks; i++) - conf->disks[i].start_sector = - conf->disks[i-1].start_sector + - conf->disks[i-1].num_sectors; - - table = conf->hash_table; - i = 0; - for (curr_sector = 0; - curr_sector < conf->array_sectors; - curr_sector += conf->spacing) { - - while (i < raid_disks-1 && - curr_sector >= conf->disks[i+1].start_sector) - i++; - - *table ++ = conf->disks + i; - } - - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so that when we divide by it, - * we err on the side of "too-low", which is safest. - */ - conf->spacing++; - } - - BUG_ON(table - conf->hash_table > nb_zone); + conf->disks[i].end_sector = + conf->disks[i-1].end_sector + + conf->disks[i].rdev->sectors; return conf; @@ -257,6 +206,8 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; + if (md_check_no_bitmap(mddev)) + return -EINVAL; mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); @@ -272,6 +223,12 @@ static int linear_run (mddev_t *mddev) return 0; } +static void free_conf(struct rcu_head *head) +{ + linear_conf_t *conf = container_of(head, linear_conf_t, rcu); + kfree(conf); +} + static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) { /* Adding a drive to a linear array allows the array to grow. @@ -282,7 +239,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) * The current one is never freed until the array is stopped. * This avoids races. */ - linear_conf_t *newconf; + linear_conf_t *newconf, *oldconf; if (rdev->saved_raid_disk != mddev->raid_disks) return -EINVAL; @@ -294,25 +251,29 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) if (!newconf) return -ENOMEM; - newconf->prev = mddev_to_conf(mddev); - mddev->private = newconf; + oldconf = rcu_dereference(mddev->private); mddev->raid_disks++; + rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + call_rcu(&oldconf->rcu, free_conf); return 0; } static int linear_stop (mddev_t *mddev) { - linear_conf_t *conf = mddev_to_conf(mddev); - + linear_conf_t *conf = mddev->private; + + /* + * We do not require rcu protection here since + * we hold reconfig_mutex for both linear_add and + * linear_stop, so they cannot race. + * We should make sure any old 'conf's are properly + * freed though. + */ + rcu_barrier(); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - do { - linear_conf_t *t = conf->prev; - kfree(conf->hash_table); - kfree(conf); - conf = t; - } while (conf); + kfree(conf); return 0; } @@ -322,6 +283,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; + sector_t start_sector; int cpu; if (unlikely(bio_barrier(bio))) { @@ -335,33 +297,36 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); + rcu_read_lock(); tmp_dev = which_dev(mddev, bio->bi_sector); - - if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + - tmp_dev->start_sector) - || (bio->bi_sector < - tmp_dev->start_sector))) { + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + + + if (unlikely(bio->bi_sector >= (tmp_dev->end_sector) + || (bio->bi_sector < start_sector))) { char b[BDEVNAME_SIZE]; printk("linear_make_request: Sector %llu out of bounds on " "dev %s: %llu sectors, offset %llu\n", (unsigned long long)bio->bi_sector, bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->num_sectors, - (unsigned long long)tmp_dev->start_sector); + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); + rcu_read_unlock(); bio_io_error(bio); return 0; } if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - tmp_dev->start_sector + tmp_dev->num_sectors)) { + tmp_dev->end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ struct bio_pair *bp; + sector_t end_sector = tmp_dev->end_sector; + + rcu_read_unlock(); - bp = bio_split(bio, - tmp_dev->start_sector + tmp_dev->num_sectors - - bio->bi_sector); + bp = bio_split(bio, end_sector - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); @@ -372,8 +337,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) } bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_sector = bio->bi_sector - tmp_dev->start_sector + bio->bi_sector = bio->bi_sector - start_sector + tmp_dev->rdev->data_offset; + rcu_read_unlock(); return 1; } @@ -381,7 +347,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) static void linear_status (struct seq_file *seq, mddev_t *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } diff --git a/drivers/md/linear.h b/drivers/md/linear.h index bf8179587f9..0ce29b61605 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -3,27 +3,19 @@ struct dev_info { mdk_rdev_t *rdev; - sector_t num_sectors; - sector_t start_sector; + sector_t end_sector; }; typedef struct dev_info dev_info_t; struct linear_private_data { - struct linear_private_data *prev; /* earlier version */ - dev_info_t **hash_table; - sector_t spacing; sector_t array_sectors; - int sector_shift; /* shift before dividing - * by spacing - */ dev_info_t disks[0]; + struct rcu_head rcu; }; typedef struct linear_private_data linear_conf_t; -#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) - #endif diff --git a/drivers/md/md.c b/drivers/md/md.c index 20f6ac33834..09be637d52c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -440,15 +440,6 @@ static inline sector_t calc_dev_sboffset(struct block_device *bdev) return MD_NEW_SIZE_SECTORS(num_sectors); } -static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) -{ - sector_t num_sectors = rdev->sb_start; - - if (chunk_size) - num_sectors &= ~((sector_t)chunk_size/512 - 1); - return num_sectors; -} - static int alloc_disk_sb(mdk_rdev_t * rdev) { if (rdev->sb_page) @@ -745,6 +736,24 @@ struct super_type { }; /* + * Check that the given mddev has no bitmap. + * + * This function is called from the run method of all personalities that do not + * support bitmaps. It prints an error message and returns non-zero if mddev + * has a bitmap. Otherwise, it returns 0. + * + */ +int md_check_no_bitmap(mddev_t *mddev) +{ + if (!mddev->bitmap_file && !mddev->bitmap_offset) + return 0; + printk(KERN_ERR "%s: bitmaps are not supported for %s\n", + mdname(mddev), mddev->pers->name); + return 1; +} +EXPORT_SYMBOL(md_check_no_bitmap); + +/* * load_super for 0.90.0 */ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) @@ -797,17 +806,6 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; - if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) { - if (sb->level != 1 && sb->level != 4 - && sb->level != 5 && sb->level != 6 - && sb->level != 10) { - /* FIXME use a better test */ - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - goto abort; - } - } - if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; else @@ -836,7 +834,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version else ret = 0; } - rdev->sectors = calc_num_sectors(rdev, sb->chunk_size); + rdev->sectors = rdev->sb_start; if (rdev->sectors < sb->size * 2 && sb->level > 1) /* "this cannot possibly happen" ... */ @@ -866,7 +864,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->minor_version = sb->minor_version; mddev->patch_version = sb->patch_version; mddev->external = 0; - mddev->chunk_size = sb->chunk_size; + mddev->chunk_sectors = sb->chunk_size >> 9; mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; @@ -883,13 +881,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = sb->delta_disks; mddev->new_level = sb->new_level; mddev->new_layout = sb->new_layout; - mddev->new_chunk = sb->new_chunk; + mddev->new_chunk_sectors = sb->new_chunk >> 9; } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; } if (sb->state & (1<<MD_SB_CLEAN)) @@ -1004,7 +1002,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_level = mddev->new_level; sb->delta_disks = mddev->delta_disks; sb->new_layout = mddev->new_layout; - sb->new_chunk = mddev->new_chunk; + sb->new_chunk = mddev->new_chunk_sectors << 9; } mddev->minor_version = sb->minor_version; if (mddev->in_sync) @@ -1018,7 +1016,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->recovery_cp = 0; sb->layout = mddev->layout; - sb->chunk_size = mddev->chunk_size; + sb->chunk_size = mddev->chunk_sectors << 9; if (mddev->bitmap && mddev->bitmap_file == NULL) sb->state |= (1<<MD_SB_BITMAP_PRESENT); @@ -1185,17 +1183,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) bdevname(rdev->bdev,b)); return -EINVAL; } - if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) { - if (sb->level != cpu_to_le32(1) && - sb->level != cpu_to_le32(4) && - sb->level != cpu_to_le32(5) && - sb->level != cpu_to_le32(6) && - sb->level != cpu_to_le32(10)) { - printk(KERN_WARNING - "md: bitmaps not supported for this level.\n"); - return -EINVAL; - } - } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); @@ -1248,9 +1235,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (rdev->sectors < le64_to_cpu(sb->data_size)) return -EINVAL; rdev->sectors = le64_to_cpu(sb->data_size); - if (le32_to_cpu(sb->chunksize)) - rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1); - if (le64_to_cpu(sb->size) > rdev->sectors) return -EINVAL; return ret; @@ -1271,7 +1255,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->major_version = 1; mddev->patch_version = 0; mddev->external = 0; - mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; + mddev->chunk_sectors = le32_to_cpu(sb->chunksize); mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); @@ -1297,13 +1281,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->delta_disks = le32_to_cpu(sb->delta_disks); mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); - mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; + mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); } else { mddev->reshape_position = MaxSector; mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; } } else if (mddev->pers == NULL) { @@ -1375,7 +1359,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); - sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -1402,7 +1386,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_layout = cpu_to_le32(mddev->new_layout); sb->delta_disks = cpu_to_le32(mddev->delta_disks); sb->new_level = cpu_to_le32(mddev->new_level); - sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); + sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); } max_dev = 0; @@ -1897,6 +1881,7 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; + mddev->utime = get_seconds(); if (mddev->external) return; repeat: @@ -1926,7 +1911,6 @@ repeat: nospares = 0; sync_req = mddev->in_sync; - mddev->utime = get_seconds(); /* If this is just a dirty<->clean transition, and the array is clean * and 'events' is odd, we can roll back to the previous clean state */ @@ -2597,15 +2581,6 @@ static void analyze_sbs(mddev_t * mddev) clear_bit(In_sync, &rdev->flags); } } - - - - if (mddev->recovery_cp != MaxSector && - mddev->level >= 1) - printk(KERN_ERR "md: %s: raid array is not clean" - " -- starting background reconstruction\n", - mdname(mddev)); - } static void md_safemode_timeout(unsigned long data); @@ -2746,7 +2721,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (IS_ERR(priv)) { mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->raid_disks -= mddev->delta_disks; mddev->delta_disks = 0; module_put(pers->owner); @@ -2764,7 +2739,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->level = mddev->new_level; mddev->layout = mddev->new_layout; - mddev->chunk_size = mddev->new_chunk; + mddev->chunk_sectors = mddev->new_chunk_sectors; mddev->delta_disks = 0; pers->run(mddev); mddev_resume(mddev); @@ -2800,11 +2775,14 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, n, -1); - if (err) + mddev->new_layout = n; + err = mddev->pers->check_reshape(mddev); + if (err) { + mddev->new_layout = mddev->layout; return err; + } } else { mddev->new_layout = n; if (mddev->reshape_position == MaxSector) @@ -2857,10 +2835,11 @@ static ssize_t chunk_size_show(mddev_t *mddev, char *page) { if (mddev->reshape_position != MaxSector && - mddev->chunk_size != mddev->new_chunk) - return sprintf(page, "%d (%d)\n", mddev->new_chunk, - mddev->chunk_size); - return sprintf(page, "%d\n", mddev->chunk_size); + mddev->chunk_sectors != mddev->new_chunk_sectors) + return sprintf(page, "%d (%d)\n", + mddev->new_chunk_sectors << 9, + mddev->chunk_sectors << 9); + return sprintf(page, "%d\n", mddev->chunk_sectors << 9); } static ssize_t @@ -2874,15 +2853,18 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { int err; - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EBUSY; - err = mddev->pers->reconfig(mddev, -1, n); - if (err) + mddev->new_chunk_sectors = n >> 9; + err = mddev->pers->check_reshape(mddev); + if (err) { + mddev->new_chunk_sectors = mddev->chunk_sectors; return err; + } } else { - mddev->new_chunk = n; + mddev->new_chunk_sectors = n >> 9; if (mddev->reshape_position == MaxSector) - mddev->chunk_size = n; + mddev->chunk_sectors = n >> 9; } return len; } @@ -3527,8 +3509,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { - if (min & (sector_t)((mddev->chunk_size>>9)-1)) + if (mddev->chunk_sectors) { + sector_t temp = min; + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_min = min; @@ -3564,8 +3547,9 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) return -EBUSY; /* Must be a multiple of chunk_size */ - if (mddev->chunk_size) { - if (max & (sector_t)((mddev->chunk_size>>9)-1)) + if (mddev->chunk_sectors) { + sector_t temp = max; + if (sector_div(temp, mddev->chunk_sectors)) return -EINVAL; } mddev->resync_max = max; @@ -3656,7 +3640,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; return len; } @@ -3976,11 +3960,9 @@ static int start_dirty_degraded; static int do_md_run(mddev_t * mddev) { int err; - int chunk_size; mdk_rdev_t *rdev; struct gendisk *disk; struct mdk_personality *pers; - char b[BDEVNAME_SIZE]; if (list_empty(&mddev->disks)) /* cannot run an array with no devices.. */ @@ -3998,38 +3980,6 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); } - chunk_size = mddev->chunk_size; - - if (chunk_size) { - if (chunk_size > MAX_CHUNK_SIZE) { - printk(KERN_ERR "too big chunk_size: %d > %d\n", - chunk_size, MAX_CHUNK_SIZE); - return -EINVAL; - } - /* - * chunk-size has to be a power of 2 - */ - if ( (1 << ffz(~chunk_size)) != chunk_size) { - printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); - return -EINVAL; - } - - /* devices must have minimum size of one chunk */ - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (test_bit(Faulty, &rdev->flags)) - continue; - if (rdev->sectors < chunk_size / 512) { - printk(KERN_WARNING - "md: Dev %s smaller than chunk_size:" - " %llu < %d\n", - bdevname(rdev->bdev,b), - (unsigned long long)rdev->sectors, - chunk_size / 512); - return -EINVAL; - } - } - } - if (mddev->level != LEVEL_NONE) request_module("md-level-%d", mddev->level); else if (mddev->clevel[0]) @@ -4405,7 +4355,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->flags = 0; mddev->ro = 0; mddev->metadata_type[0] = 0; - mddev->chunk_size = 0; + mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; mddev->layout = 0; mddev->max_disks = 0; @@ -4413,7 +4363,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->delta_disks = 0; mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; - mddev->new_chunk = 0; + mddev->new_chunk_sectors = 0; mddev->curr_resync = 0; mddev->resync_mismatches = 0; mddev->suspend_lo = mddev->suspend_hi = 0; @@ -4618,7 +4568,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.spare_disks = spare; info.layout = mddev->layout; - info.chunk_size = mddev->chunk_size; + info.chunk_size = mddev->chunk_sectors << 9; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; @@ -4843,7 +4793,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; } else rdev->sb_start = calc_dev_sboffset(rdev->bdev); - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = rdev->sb_start; err = bind_rdev_to_array(rdev, mddev); if (err) { @@ -4913,7 +4863,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) else rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; - rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size); + rdev->sectors = rdev->sb_start; if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING @@ -5062,7 +5012,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->external = 0; mddev->layout = info->layout; - mddev->chunk_size = info->chunk_size; + mddev->chunk_sectors = info->chunk_size >> 9; mddev->max_disks = MD_SB_DISKS; @@ -5081,7 +5031,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) get_random_bytes(mddev->uuid, 16); mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->delta_disks = 0; @@ -5191,7 +5141,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->level != info->level || /* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| - mddev->chunk_size != info->chunk_size || + mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) ) @@ -5215,10 +5165,15 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) * we don't need to do anything at the md level, the * personality will take care of it all. */ - if (mddev->pers->reconfig == NULL) + if (mddev->pers->check_reshape == NULL) return -EINVAL; - else - return mddev->pers->reconfig(mddev, info->layout, -1); + else { + mddev->new_layout = info->layout; + rv = mddev->pers->check_reshape(mddev); + if (rv) + mddev->new_layout = mddev->layout; + return rv; + } } if (info->size >= 0 && mddev->dev_sectors / 2 != info->size) rv = update_size(mddev, (sector_t)info->size * 2); @@ -6717,7 +6672,8 @@ void md_check_recovery(mddev_t *mddev) */ if (mddev->reshape_position != MaxSector) { - if (mddev->pers->check_reshape(mddev) != 0) + if (mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev) != 0) /* Cannot proceed */ goto unlock; set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); diff --git a/drivers/md/md.h b/drivers/md/md.h index 8227ab909d4..9430a110db9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -30,13 +30,6 @@ typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; /* - * options passed in raidrun: - */ - -/* Currently this must fit in an 'int' */ -#define MAX_CHUNK_SIZE (1<<30) - -/* * MD's 'extended' device */ struct mdk_rdev_s @@ -145,7 +138,7 @@ struct mddev_s int external; /* metadata is * managed externally */ char metadata_type[17]; /* externally set*/ - int chunk_size; + int chunk_sectors; time_t ctime, utime; int level, layout; char clevel[16]; @@ -166,7 +159,8 @@ struct mddev_s * If reshape_position is MaxSector, then no reshape is happening (yet). */ sector_t reshape_position; - int delta_disks, new_level, new_layout, new_chunk; + int delta_disks, new_level, new_layout; + int new_chunk_sectors; struct mdk_thread_s *thread; /* management thread */ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ @@ -325,7 +319,6 @@ struct mdk_personality int (*check_reshape) (mddev_t *mddev); int (*start_reshape) (mddev_t *mddev); void (*finish_reshape) (mddev_t *mddev); - int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); /* quiesce moves between quiescence states * 0 - fully active * 1 - no new requests allowed @@ -437,5 +430,6 @@ extern void md_new_event(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); +extern int md_check_no_bitmap(mddev_t *mddev); #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4ee31aa13c4..cbe368fa659 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -58,7 +58,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) { unsigned long flags; mddev_t *mddev = mp_bh->mddev; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&mp_bh->retry_list, &conf->retry_list); @@ -75,7 +75,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh) static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) { struct bio *bio = mp_bh->master_bio; - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; bio_endio(bio, err); mempool_free(mp_bh, conf->pool); @@ -85,7 +85,7 @@ static void multipath_end_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private); - multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev); + multipath_conf_t *conf = mp_bh->mddev->private; mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; if (uptodate) @@ -107,7 +107,7 @@ static void multipath_end_request(struct bio *bio, int error) static void unplug_slaves(mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -138,7 +138,7 @@ static void multipath_unplug(struct request_queue *q) static int multipath_make_request (struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); @@ -180,7 +180,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) static void multipath_status (struct seq_file *seq, mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i; seq_printf (seq, " [%d/%d] [", conf->raid_disks, @@ -195,7 +195,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) static int multipath_congested(void *data, int bits) { mddev_t *mddev = data; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -220,7 +220,7 @@ static int multipath_congested(void *data, int bits) */ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; if (conf->working_disks <= 1) { /* @@ -367,7 +367,7 @@ static void multipathd (mddev_t *mddev) struct multipath_bh *mp_bh; struct bio *bio; unsigned long flags; - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; md_check_recovery(mddev); @@ -421,6 +421,9 @@ static int multipath_run (mddev_t *mddev) struct multipath_info *disk; mdk_rdev_t *rdev; + if (md_check_no_bitmap(mddev)) + return -EINVAL; + if (mddev->level != LEVEL_MULTIPATH) { printk("multipath: %s: raid level not set to multipath IO (%d)\n", mdname(mddev), mddev->level); @@ -531,7 +534,7 @@ out: static int multipath_stop (mddev_t *mddev) { - multipath_conf_t *conf = mddev_to_conf(mddev); + multipath_conf_t *conf = mddev->private; md_unregister_thread(mddev->thread); mddev->thread = NULL; diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index 6fa70b400cd..d1c2a8d7839 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h @@ -19,12 +19,6 @@ struct multipath_private_data { typedef struct multipath_private_data multipath_conf_t; /* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private) - -/* * this is our 'private' 'collective' MULTIPATH buffer head. * it contains information about what kind of IO operations were started * for this MULTIPATH operation, and about their status: diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 925507e7d67..ab4a489d869 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -26,8 +26,8 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + raid0_conf_t *conf = mddev->private; + mdk_rdev_t **devlist = conf->devlist; int i; for (i=0; i<mddev->raid_disks; i++) { @@ -40,8 +40,8 @@ static void raid0_unplug(struct request_queue *q) static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; - raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + raid0_conf_t *conf = mddev->private; + mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { @@ -52,27 +52,60 @@ static int raid0_congested(void *data, int bits) return ret; } +/* + * inform the user of the raid configuration +*/ +static void dump_zones(mddev_t *mddev) +{ + int j, k, h; + sector_t zone_size = 0; + sector_t zone_start = 0; + char b[BDEVNAME_SIZE]; + raid0_conf_t *conf = mddev->private; + printk(KERN_INFO "******* %s configuration *********\n", + mdname(mddev)); + h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { + printk(KERN_INFO "zone%d=[", j); + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) + printk("%s/", + bdevname(conf->devlist[j*mddev->raid_disks + + k]->bdev, b)); + printk("]\n"); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + printk(KERN_INFO " zone offset=%llukb " + "device offset=%llukb size=%llukb\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; + } + printk(KERN_INFO "**********************************\n\n"); +} -static int create_strip_zones (mddev_t *mddev) +static int create_strip_zones(mddev_t *mddev) { - int i, c, j; - sector_t current_start, curr_zone_start; - sector_t min_spacing; - raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; + int i, c, j, err; + sector_t curr_zone_end, sectors; + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; - - /* - * The number of 'same size groups' - */ - conf->nr_strip_zones = 0; - + raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL); + + if (!conf) + return -ENOMEM; list_for_each_entry(rdev1, &mddev->disks, same_set) { printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; + + /* round size to chunk_size */ + sectors = rdev1->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev1->sectors = sectors * mddev->chunk_sectors; + list_for_each_entry(rdev2, &mddev->disks, same_set) { printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), @@ -103,16 +136,16 @@ static int create_strip_zones (mddev_t *mddev) } } printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); - + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) - return 1; + goto abort; conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) - return 1; + goto abort; /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all @@ -120,7 +153,8 @@ static int create_strip_zones (mddev_t *mddev) zone = &conf->strip_zone[0]; cnt = 0; smallest = NULL; - zone->dev = conf->devlist; + dev = conf->devlist; + err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -129,12 +163,12 @@ static int create_strip_zones (mddev_t *mddev) "aborting!\n", j); goto abort; } - if (zone->dev[j]) { + if (dev[j]) { printk(KERN_ERR "raid0: multiple devices for %d - " "aborting!\n", j); goto abort; } - zone->dev[j] = rdev1; + dev[j] = rdev1; blk_queue_stack_limits(mddev->queue, rdev1->bdev->bd_disk->queue); @@ -157,34 +191,32 @@ static int create_strip_zones (mddev_t *mddev) goto abort; } zone->nb_dev = cnt; - zone->sectors = smallest->sectors * cnt; - zone->zone_start = 0; + zone->zone_end = smallest->sectors * cnt; - current_start = smallest->sectors; - curr_zone_start = zone->sectors; + curr_zone_end = zone->zone_end; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) { zone = conf->strip_zone + i; - zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; + dev = conf->devlist + i * mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); - zone->dev_start = current_start; + zone->dev_start = smallest->sectors; smallest = NULL; c = 0; for (j=0; j<cnt; j++) { char b[BDEVNAME_SIZE]; - rdev = conf->strip_zone[0].dev[j]; + rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); - if (rdev->sectors <= current_start) { + if (rdev->sectors <= zone->dev_start) { printk(KERN_INFO " nope.\n"); continue; } printk(KERN_INFO " contained as device %d\n", c); - zone->dev[c] = rdev; + dev[c] = rdev; c++; if (!smallest || rdev->sectors < smallest->sectors) { smallest = rdev; @@ -194,47 +226,39 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - current_start) * c; + sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", - zone->nb_dev, (unsigned long long)zone->sectors); + zone->nb_dev, (unsigned long long)sectors); - zone->zone_start = curr_zone_start; - curr_zone_start += zone->sectors; + curr_zone_end += sectors; + zone->zone_end = curr_zone_end; - current_start = smallest->sectors; printk(KERN_INFO "raid0: current zone start: %llu\n", - (unsigned long long)current_start); - } - - /* Now find appropriate hash spacing. - * We want a number which causes most hash entries to cover - * at most two strips, but the hash table must be at most - * 1 PAGE. We choose the smallest strip, or contiguous collection - * of strips, that has big enough size. We never consider the last - * strip though as it's size has no bearing on the efficacy of the hash - * table. - */ - conf->spacing = curr_zone_start; - min_spacing = curr_zone_start; - sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); - for (i=0; i < conf->nr_strip_zones-1; i++) { - sector_t s = 0; - for (j = i; j < conf->nr_strip_zones - 1 && - s < min_spacing; j++) - s += conf->strip_zone[j].sectors; - if (s >= min_spacing && s < conf->spacing) - conf->spacing = s; + (unsigned long long)smallest->sectors); } - mddev->queue->unplug_fn = raid0_unplug; - mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { + printk(KERN_ERR "%s chunk_size of %d not valid\n", + mdname(mddev), + mddev->chunk_sectors << 9); + goto abort; + } printk(KERN_INFO "raid0: done.\n"); + mddev->private = conf; return 0; - abort: - return 1; +abort: + kfree(conf->strip_zone); + kfree(conf->devlist); + kfree(conf); + mddev->private = NULL; + return err; } /** @@ -252,10 +276,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; + if (is_power_of_2(chunk_sectors)) + max = (chunk_sectors - ((sector & (chunk_sectors-1)) + + bio_sectors)) << 9; + else + max = (chunk_sectors - (sector_div(sector, chunk_sectors) + + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= biovec->bv_len && bio_sectors == 0) return biovec->bv_len; @@ -277,84 +306,28 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) return array_sectors; } -static int raid0_run (mddev_t *mddev) +static int raid0_run(mddev_t *mddev) { - unsigned cur=0, i=0, nb_zone; - s64 sectors; - raid0_conf_t *conf; + int ret; - if (mddev->chunk_size == 0) { - printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + if (mddev->chunk_sectors == 0) { + printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } - printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", - mdname(mddev), - mddev->chunk_size >> 9, - (mddev->chunk_size>>1)-1); - blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); - blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); + if (md_check_no_bitmap(mddev)) + return -EINVAL; + blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; - conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); - if (!conf) - goto out; - mddev->private = (void *)conf; - - conf->strip_zone = NULL; - conf->devlist = NULL; - if (create_strip_zones (mddev)) - goto out_free_conf; + ret = create_strip_zones(mddev); + if (ret < 0) + return ret; /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", (unsigned long long)mddev->array_sectors); - printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", - (unsigned long long)conf->spacing); - { - sector_t s = raid0_size(mddev, 0, 0); - sector_t space = conf->spacing; - int round; - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - /*shift down space and s so that sector_div will work */ - while (space > (sector_t) (~(u32)0)) { - s >>= 1; - space >>= 1; - s += 1; /* force round-up */ - conf->sector_shift++; - } - } - round = sector_div(s, (u32)space) ? 1 : 0; - nb_zone = s + round; - } - printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - - printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", - nb_zone*sizeof(struct strip_zone*)); - conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); - if (!conf->hash_table) - goto out_free_conf; - sectors = conf->strip_zone[cur].sectors; - - conf->hash_table[0] = conf->strip_zone + cur; - for (i=1; i< nb_zone; i++) { - while (sectors <= conf->spacing) { - cur++; - sectors += conf->strip_zone[cur].sectors; - } - sectors -= conf->spacing; - conf->hash_table[i] = conf->strip_zone + cur; - } - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so when we divide by it, we - * err on the side of too-low, which is safest - */ - conf->spacing++; - } - /* calculate the max read-ahead size. * For read-ahead of large files to be effective, we need to * readahead at least twice a whole stripe. i.e. number of devices @@ -365,48 +338,107 @@ static int raid0_run (mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; + int stripe = mddev->raid_disks * + (mddev->chunk_sectors << 9) / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } - blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); + dump_zones(mddev); return 0; +} -out_free_conf: +static int raid0_stop(mddev_t *mddev) +{ + raid0_conf_t *conf = mddev->private; + + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); kfree(conf->devlist); kfree(conf); mddev->private = NULL; -out: - return -ENOMEM; + return 0; } -static int raid0_stop (mddev_t *mddev) +/* Find the zone which holds a particular offset + * Update *sectorp to be an offset in that zone + */ +static struct strip_zone *find_zone(struct raid0_private_data *conf, + sector_t *sectorp) { - raid0_conf_t *conf = mddev_to_conf(mddev); + int i; + struct strip_zone *z = conf->strip_zone; + sector_t sector = *sectorp; + + for (i = 0; i < conf->nr_strip_zones; i++) + if (sector < z[i].zone_end) { + if (i) + *sectorp = sector - z[i-1].zone_end; + return z + i; + } + BUG(); +} - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - kfree(conf->hash_table); - conf->hash_table = NULL; - kfree(conf->strip_zone); - conf->strip_zone = NULL; - kfree(conf); - mddev->private = NULL; +/* + * remaps the bio to the target device. we separate two flows. + * power 2 flow and a general flow for the sake of perfromance +*/ +static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, + sector_t sector, sector_t *sector_offset) +{ + unsigned int sect_in_chunk; + sector_t chunk; + raid0_conf_t *conf = mddev->private; + unsigned int chunk_sects = mddev->chunk_sectors; + + if (is_power_of_2(chunk_sects)) { + int chunksect_bits = ffz(~chunk_sects); + /* find the sector offset inside the chunk */ + sect_in_chunk = sector & (chunk_sects - 1); + sector >>= chunksect_bits; + /* chunk in zone */ + chunk = *sector_offset; + /* quotient is the chunk in real device*/ + sector_div(chunk, zone->nb_dev << chunksect_bits); + } else{ + sect_in_chunk = sector_div(sector, chunk_sects); + chunk = *sector_offset; + sector_div(chunk, chunk_sects * zone->nb_dev); + } + /* + * position the bio over the real device + * real sector = chunk in device + starting of zone + * + the position in the chunk + */ + *sector_offset = (chunk * chunk_sects) + sect_in_chunk; + return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(sector, zone->nb_dev)]; +} - return 0; +/* + * Is io distribute over 1 or more chunks ? +*/ +static inline int is_io_in_chunk_boundary(mddev_t *mddev, + unsigned int chunk_sects, struct bio *bio) +{ + if (likely(is_power_of_2(chunk_sects))) { + return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + + (bio->bi_size >> 9)); + } else{ + sector_t sector = bio->bi_sector; + return chunk_sects >= (sector_div(sector, chunk_sects) + + (bio->bi_size >> 9)); + } } -static int raid0_make_request (struct request_queue *q, struct bio *bio) +static int raid0_make_request(struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksect_bits, chunk_sects; - raid0_conf_t *conf = mddev_to_conf(mddev); + unsigned int chunk_sects; + sector_t sector_offset; struct strip_zone *zone; mdk_rdev_t *tmp_dev; - sector_t chunk; - sector_t sector, rsect; const int rw = bio_data_dir(bio); int cpu; @@ -421,11 +453,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_sects = mddev->chunk_size >> 9; - chunksect_bits = ffz(~chunk_sects); - sector = bio->bi_sector; - - if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { + chunk_sects = mddev->chunk_sectors; + if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { + sector_t sector = bio->bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ if (bio->bi_vcnt != 1 || @@ -434,7 +464,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); + if (likely(is_power_of_2(chunk_sects))) + bp = bio_split(bio, chunk_sects - (sector & + (chunk_sects-1))); + else + bp = bio_split(bio, chunk_sects - + sector_div(sector, chunk_sects)); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) @@ -443,34 +478,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - - - { - sector_t x = sector >> conf->sector_shift; - sector_div(x, (u32)conf->spacing); - zone = conf->hash_table[x]; - } - while (sector >= zone->zone_start + zone->sectors) - zone++; - - sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - - - { - sector_t x = (sector - zone->zone_start) >> chunksect_bits; - - sector_div(x, zone->nb_dev); - chunk = x; - - x = sector >> chunksect_bits; - tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; - } - rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; - + sector_offset = bio->bi_sector; + zone = find_zone(mddev->private, §or_offset); + tmp_dev = map_sector(mddev, zone, bio->bi_sector, + §or_offset); bio->bi_bdev = tmp_dev->bdev; - bio->bi_sector = rsect + tmp_dev->data_offset; - + bio->bi_sector = sector_offset + zone->dev_start + + tmp_dev->data_offset; /* * Let the main block layer submit the IO and resolve recursion: */ @@ -485,31 +500,35 @@ bad_map: return 0; } -static void raid0_status (struct seq_file *seq, mddev_t *mddev) +static void raid0_status(struct seq_file *seq, mddev_t *mddev) { #undef MD_DEBUG #ifdef MD_DEBUG int j, k, h; char b[BDEVNAME_SIZE]; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; + sector_t zone_size; + sector_t zone_start = 0; h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); - if (conf->hash_table[h] == conf->strip_zone+j) - seq_printf(seq, "(h%d)", h++); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( - conf->strip_zone[j].dev[k]->bdev,b)); - - seq_printf(seq, "] zs=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_start, - conf->strip_zone[j].dev_start, - conf->strip_zone[j].sectors); + conf->devlist[j*mddev->raid_disks + k] + ->bdev, b)); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; } #endif - seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); return; } diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 824b12eb1d4..91f8e876ee6 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -3,26 +3,18 @@ struct strip_zone { - sector_t zone_start; /* Zone offset in md_dev (in sectors) */ + sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ - sector_t sectors; /* Zone size in sectors */ int nb_dev; /* # of devices attached to the zone */ - mdk_rdev_t **dev; /* Devices attached to the zone */ }; struct raid0_private_data { - struct strip_zone **hash_table; /* Table of indexes into strip_zone */ struct strip_zone *strip_zone; mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ int nr_strip_zones; - - sector_t spacing; - int sector_shift; /* shift this before divide by spacing */ }; typedef struct raid0_private_data raid0_conf_t; -#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) - #endif diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e23758b4a34..89939a7aef5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -182,7 +182,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) static void free_r1bio(r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -196,7 +196,7 @@ static void free_r1bio(r1bio_t *r1_bio) static void put_buf(r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; int i; for (i=0; i<conf->raid_disks; i++) { @@ -214,7 +214,7 @@ static void reschedule_retry(r1bio_t *r1_bio) { unsigned long flags; mddev_t *mddev = r1_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r1_bio->retry_list, &conf->retry_list); @@ -253,7 +253,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) */ static inline void update_head_pos(int disk, r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; conf->mirrors[disk].head_position = r1_bio->sector + (r1_bio->sectors); @@ -264,7 +264,7 @@ static void raid1_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); int mirror; - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; mirror = r1_bio->read_disk; /* @@ -309,7 +309,7 @@ static void raid1_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); - conf_t *conf = mddev_to_conf(r1_bio->mddev); + conf_t *conf = r1_bio->mddev->private; struct bio *to_put = NULL; @@ -541,7 +541,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -573,7 +573,7 @@ static void raid1_unplug(struct request_queue *q) static int raid1_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -772,7 +772,7 @@ do_sync_io: static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *read_bio; @@ -991,7 +991,7 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; seq_printf(seq, " [%d/%d] [", conf->raid_disks, @@ -1010,7 +1010,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1214,7 +1214,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); mddev_t *mddev = r1_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; int mirror=0; @@ -1248,7 +1248,7 @@ static void end_sync_write(struct bio *bio, int error) static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; int disks = conf->raid_disks; struct bio *bio, *wbio; @@ -1562,7 +1562,7 @@ static void raid1d(mddev_t *mddev) r1bio_t *r1_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1585,7 +1585,7 @@ static void raid1d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r1_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); unplug = 1; @@ -1706,7 +1706,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r1bio_t *r1_bio; struct bio *bio; sector_t max_sector, nr_sectors; @@ -2052,6 +2052,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid1: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid1: raid set %s active with %d out of %d mirrors\n", mdname(mddev), mddev->raid_disks - mddev->degraded, @@ -2087,7 +2091,7 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct bitmap *bitmap = mddev->bitmap; int behind_wait = 0; @@ -2155,16 +2159,16 @@ static int raid1_reshape(mddev_t *mddev) mempool_t *newpool, *oldpool; struct pool_info *newpoolinfo; mirror_info_t *newmirrors; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int cnt, raid_disks; unsigned long flags; int d, d2, err; /* Cannot change chunk_size, layout, or level */ - if (mddev->chunk_size != mddev->new_chunk || + if (mddev->chunk_sectors != mddev->new_chunk_sectors || mddev->layout != mddev->new_layout || mddev->level != mddev->new_level) { - mddev->new_chunk = mddev->chunk_size; + mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_layout = mddev->layout; mddev->new_level = mddev->level; return -EINVAL; @@ -2252,7 +2256,7 @@ static int raid1_reshape(mddev_t *mddev) static void raid1_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 1620eea3d57..e87b84deff6 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -64,12 +64,6 @@ struct r1_private_data_s { typedef struct r1_private_data_s conf_t; /* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* * this is our 'private' RAID1 bio. * * it contains information about what kind of IO operations were started diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 750550c1166..ae12ceafe10 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -188,7 +188,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static void free_r10bio(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -202,7 +202,7 @@ static void free_r10bio(r10bio_t *r10_bio) static void put_buf(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; mempool_free(r10_bio, conf->r10buf_pool); @@ -213,7 +213,7 @@ static void reschedule_retry(r10bio_t *r10_bio) { unsigned long flags; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); @@ -245,7 +245,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio) */ static inline void update_head_pos(int slot, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; conf->mirrors[r10_bio->devs[slot].devnum].head_position = r10_bio->devs[slot].addr + (r10_bio->sectors); @@ -256,7 +256,7 @@ static void raid10_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; slot = r10_bio->read_slot; @@ -297,7 +297,7 @@ static void raid10_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; for (slot = 0; slot < conf->copies; slot++) if (r10_bio->devs[slot].bio == bio) @@ -461,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -596,7 +596,7 @@ rb_out: static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -628,7 +628,7 @@ static void raid10_unplug(struct request_queue *q) static int raid10_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -788,7 +788,7 @@ static void unfreeze_array(conf_t *conf) static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; @@ -981,11 +981,11 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; if (conf->near_copies < conf->raid_disks) - seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); if (conf->near_copies > 1) seq_printf(seq, " %d near-copies", conf->near_copies); if (conf->far_copies > 1) { @@ -1006,7 +1006,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1215,7 +1215,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; int i,d; for (i=0; i<conf->copies; i++) @@ -1253,7 +1253,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i,d; for (i = 0; i < conf->copies; i++) @@ -1300,7 +1300,7 @@ static void end_sync_write(struct bio *bio, int error) */ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, first; struct bio *tbio, *fbio; @@ -1400,7 +1400,7 @@ done: static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, d; struct bio *bio, *wbio; @@ -1549,7 +1549,7 @@ static void raid10d(mddev_t *mddev) r10bio_t *r10_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1572,7 +1572,7 @@ static void raid10d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R10BIO_IsSync, &r10_bio->state)) { sync_request_write(mddev, r10_bio); unplug = 1; @@ -1680,7 +1680,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r10bio_t *r10_bio; struct bio *biolist = NULL, *bio; sector_t max_sector, nr_sectors; @@ -2026,7 +2026,7 @@ static sector_t raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) { sector_t size; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; if (!raid_disks) raid_disks = mddev->raid_disks; @@ -2050,9 +2050,10 @@ static int run(mddev_t *mddev) int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size < PAGE_SIZE) { + if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || + !is_power_of_2(mddev->chunk_sectors)) { printk(KERN_ERR "md/raid10: chunk size must be " - "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); + "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); return -EINVAL; } @@ -2095,8 +2096,8 @@ static int run(mddev_t *mddev) conf->far_copies = fc; conf->copies = nc*fc; conf->far_offset = fo; - conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; - conf->chunk_shift = ffz(~mddev->chunk_size) - 9; + conf->chunk_mask = mddev->chunk_sectors - 1; + conf->chunk_shift = ffz(~mddev->chunk_sectors); size = mddev->dev_sectors >> conf->chunk_shift; sector_div(size, fc); size = size * conf->raid_disks; @@ -2185,6 +2186,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid10: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid10: raid set %s active with %d out of %d devices\n", mdname(mddev), mddev->raid_disks - mddev->degraded, @@ -2204,7 +2209,8 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); + int stripe = conf->raid_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; @@ -2227,7 +2233,7 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; raise_barrier(conf, 0); lower_barrier(conf); @@ -2245,7 +2251,7 @@ static int stop(mddev_t *mddev) static void raid10_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 244dbe507a5..59cd1efb8d3 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -62,12 +62,6 @@ struct r10_private_data_s { typedef struct r10_private_data_s conf_t; /* - * this is the only point in the RAID code where we violate - * C type safety. mddev->private is an 'opaque' pointer. - */ -#define mddev_to_conf(mddev) ((conf_t *) mddev->private) - -/* * this is our 'private' RAID10 bio. * * it contains information about what kind of IO operations were started diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bef87669823..f9f991e6e13 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, sector_t new_sector; int algorithm = previous ? conf->prev_algo : conf->algorithm; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int raid_disks = previous ? conf->previous_raid_disks : conf->raid_disks; int data_disks = raid_disks - conf->max_degraded; @@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) int raid_disks = sh->disks; int data_disks = raid_disks - conf->max_degraded; sector_t new_sector = sh->sector, check; - int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + int sectors_per_chunk = previous ? conf->prev_chunk_sectors + : conf->chunk_sectors; int algorithm = previous ? conf->prev_algo : conf->algorithm; sector_t stripe; @@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, struct stripe_head *sh) { int sectors_per_chunk = - previous ? (conf->prev_chunk >> 9) - : (conf->chunk_size >> 9); + previous ? conf->prev_chunk_sectors : conf->chunk_sectors; int dd_idx; int chunk_offset = sector_div(stripe, sectors_per_chunk); int disks = previous ? conf->previous_raid_disks : conf->raid_disks; @@ -3284,7 +3283,7 @@ static void activate_bit_delay(raid5_conf_t *conf) static void unplug_slaves(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -3308,7 +3307,7 @@ static void unplug_slaves(mddev_t *mddev) static void raid5_unplug_device(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -3327,7 +3326,7 @@ static void raid5_unplug_device(struct request_queue *q) static int raid5_congested(void *data, int bits) { mddev_t *mddev = data; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; /* No difference between reads and writes. Just check * how busy the stripe_cache is @@ -3352,14 +3351,14 @@ static int raid5_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ - if (mddev->new_chunk < mddev->chunk_size) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; if (max <= biovec->bv_len && bio_sectors == 0) @@ -3372,11 +3371,11 @@ static int raid5_mergeable_bvec(struct request_queue *q, static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) { sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bio->bi_size >> 9; - if (mddev->new_chunk < mddev->chunk_size) - chunk_sectors = mddev->new_chunk >> 9; + if (mddev->new_chunk_sectors < mddev->chunk_sectors) + chunk_sectors = mddev->new_chunk_sectors; return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); } @@ -3440,7 +3439,7 @@ static void raid5_align_endio(struct bio *bi, int error) bio_put(bi); mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; - conf = mddev_to_conf(mddev); + conf = mddev->private; rdev = (void*)raid_bi->bi_next; raid_bi->bi_next = NULL; @@ -3482,7 +3481,7 @@ static int bio_fits_rdev(struct bio *bi) static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned int dd_idx; struct bio* align_bi; mdk_rdev_t *rdev; @@ -3599,7 +3598,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) static int make_request(struct request_queue *q, struct bio * bi) { mddev_t *mddev = q->queuedata; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int dd_idx; sector_t new_sector; sector_t logical_sector, last_sector; @@ -3696,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi) spin_unlock_irq(&conf->device_lock); if (must_retry) { release_stripe(sh); + schedule(); goto retry; } } @@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * If old and new chunk sizes differ, we need to process the * largest of these */ - if (mddev->new_chunk > mddev->chunk_size) - reshape_sectors = mddev->new_chunk / 512; + if (mddev->new_chunk_sectors > mddev->chunk_sectors) + reshape_sectors = mddev->new_chunk_sectors; else - reshape_sectors = mddev->chunk_size / 512; + reshape_sectors = mddev->chunk_sectors; /* we update the metadata when there is more than 3Meg * in the block range (that is rather arbitrary, should @@ -3917,7 +3917,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped 1, &dd_idx, NULL); last_sector = raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) - *(new_data_disks) - 1), + * new_data_disks - 1), 1, &dd_idx, NULL); if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; @@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; - mddev->curr_resync_completed = mddev->curr_resync; + mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) static void raid5d(mddev_t *mddev) { struct stripe_head *sh; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; int handled; pr_debug("+++ raid5d active\n"); @@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev) static ssize_t raid5_show_stripe_cache_size(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->max_nr_stripes); else @@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) static ssize_t raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; int err; @@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, static ssize_t raid5_show_preread_threshold(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", conf->bypass_threshold); else @@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page) static ssize_t raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; unsigned long new; if (len >= PAGE_SIZE) return -EINVAL; @@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, static ssize_t stripe_cache_active_show(mddev_t *mddev, char *page) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (conf) return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); else @@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = { static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!sectors) sectors = mddev->dev_sectors; @@ -4303,8 +4303,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) raid_disks = conf->previous_raid_disks; } - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); - sectors &= ~((sector_t)mddev->new_chunk/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); + sectors &= ~((sector_t)mddev->new_chunk_sectors - 1); return sectors * (raid_disks - conf->max_degraded); } @@ -4336,9 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) return ERR_PTR(-EINVAL); } - if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { + if (!mddev->new_chunk_sectors || + (mddev->new_chunk_sectors << 9) % PAGE_SIZE || + !is_power_of_2(mddev->new_chunk_sectors)) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", - mddev->new_chunk, mdname(mddev)); + mddev->new_chunk_sectors << 9, mdname(mddev)); return ERR_PTR(-EINVAL); } @@ -4401,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->fullsync = 1; } - conf->chunk_size = mddev->new_chunk; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->level = mddev->new_level; if (conf->level == 6) conf->max_degraded = 2; @@ -4411,7 +4413,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) conf->max_nr_stripes = NR_STRIPES; conf->reshape_progress = mddev->reshape_position; if (conf->reshape_progress != MaxSector) { - conf->prev_chunk = mddev->chunk_size; + conf->prev_chunk_sectors = mddev->chunk_sectors; conf->prev_algo = mddev->layout; } @@ -4453,6 +4455,10 @@ static int run(mddev_t *mddev) int working_disks = 0; mdk_rdev_t *rdev; + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid5: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); if (mddev->reshape_position != MaxSector) { /* Check that we can continue the reshape. * Currently only disks can change, it must @@ -4475,7 +4481,7 @@ static int run(mddev_t *mddev) * geometry. */ here_new = mddev->reshape_position; - if (sector_div(here_new, (mddev->new_chunk>>9)* + if (sector_div(here_new, mddev->new_chunk_sectors * (mddev->raid_disks - max_degraded))) { printk(KERN_ERR "raid5: reshape_position not " "on a stripe boundary\n"); @@ -4483,7 +4489,7 @@ static int run(mddev_t *mddev) } /* here_new is the stripe we will write to */ here_old = mddev->reshape_position; - sector_div(here_old, (mddev->chunk_size>>9)* + sector_div(here_old, mddev->chunk_sectors * (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ @@ -4498,7 +4504,7 @@ static int run(mddev_t *mddev) } else { BUG_ON(mddev->level != mddev->new_level); BUG_ON(mddev->layout != mddev->new_layout); - BUG_ON(mddev->chunk_size != mddev->new_chunk); + BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); BUG_ON(mddev->delta_disks != 0); } @@ -4532,7 +4538,7 @@ static int run(mddev_t *mddev) } /* device size must be a multiple of chunk size */ - mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); + mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > 0 && @@ -4581,7 +4587,7 @@ static int run(mddev_t *mddev) { int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * - (mddev->chunk_size / PAGE_SIZE); + ((mddev->chunk_sectors << 9) / PAGE_SIZE); if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -4678,7 +4684,8 @@ static void status(struct seq_file *seq, mddev_t *mddev) raid5_conf_t *conf = (raid5_conf_t *) mddev->private; int i; - seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); + seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, + mddev->chunk_sectors / 2, mddev->layout); seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); for (i = 0; i < conf->raid_disks; i++) seq_printf (seq, "%s", @@ -4826,7 +4833,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) * any io in the removed space completes, but it hardly seems * worth it. */ - sectors &= ~((sector_t)mddev->chunk_size/512 - 1); + sectors &= ~((sector_t)mddev->chunk_sectors - 1); md_set_array_sectors(mddev, raid5_size(mddev, sectors, mddev->raid_disks)); if (mddev->array_sectors > @@ -4843,14 +4850,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return 0; } -static int raid5_check_reshape(mddev_t *mddev) +static int check_stripe_cache(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + /* Can only proceed if there are plenty of stripe_heads. + * We need a minimum of one full stripe,, and for sensible progress + * it is best to have about 4 times that. + * If we require 4 times, then the default 256 4K stripe_heads will + * allow for chunk sizes up to 256K, which is probably OK. + * If the chunk size is greater, user-space should request more + * stripe_heads first. + */ + raid5_conf_t *conf = mddev->private; + if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes || + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 + > conf->max_nr_stripes) { + printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", + ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) + / STRIPE_SIZE)*4); + return 0; + } + return 1; +} + +static int check_reshape(mddev_t *mddev) +{ + raid5_conf_t *conf = mddev->private; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && - mddev->new_chunk == mddev->chunk_size) - return -EINVAL; /* nothing to do */ + mddev->new_chunk_sectors == mddev->chunk_sectors) + return 0; /* nothing to do */ if (mddev->bitmap) /* Cannot grow a bitmap yet */ return -EBUSY; @@ -4869,28 +4899,15 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; } - /* Can only proceed if there are plenty of stripe_heads. - * We need a minimum of one full stripe,, and for sensible progress - * it is best to have about 4 times that. - * If we require 4 times, then the default 256 4K stripe_heads will - * allow for chunk sizes up to 256K, which is probably OK. - * If the chunk size is greater, user-space should request more - * stripe_heads first. - */ - if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes || - (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) { - printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n", - (max(mddev->chunk_size, mddev->new_chunk) - / STRIPE_SIZE)*4); + if (!check_stripe_cache(mddev)) return -ENOSPC; - } return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); } static int raid5_start_reshape(mddev_t *mddev) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; mdk_rdev_t *rdev; int spares = 0; int added_devices = 0; @@ -4899,6 +4916,9 @@ static int raid5_start_reshape(mddev_t *mddev) if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; + if (!check_stripe_cache(mddev)) + return -ENOSPC; + list_for_each_entry(rdev, &mddev->disks, same_set) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) @@ -4925,8 +4945,8 @@ static int raid5_start_reshape(mddev_t *mddev) spin_lock_irq(&conf->device_lock); conf->previous_raid_disks = conf->raid_disks; conf->raid_disks += mddev->delta_disks; - conf->prev_chunk = conf->chunk_size; - conf->chunk_size = mddev->new_chunk; + conf->prev_chunk_sectors = conf->chunk_sectors; + conf->chunk_sectors = mddev->new_chunk_sectors; conf->prev_algo = conf->algorithm; conf->algorithm = mddev->new_layout; if (mddev->delta_disks < 0) @@ -5008,7 +5028,7 @@ static void end_reshape(raid5_conf_t *conf) */ { int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * (conf->chunk_size + int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; @@ -5022,7 +5042,7 @@ static void end_reshape(raid5_conf_t *conf) static void raid5_finish_reshape(mddev_t *mddev) { struct block_device *bdev; - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5053,7 +5073,7 @@ static void raid5_finish_reshape(mddev_t *mddev) raid5_remove_disk(mddev, d); } mddev->layout = conf->algorithm; - mddev->chunk_size = conf->chunk_size; + mddev->chunk_sectors = conf->chunk_sectors; mddev->reshape_position = MaxSector; mddev->delta_disks = 0; } @@ -5061,7 +5081,7 @@ static void raid5_finish_reshape(mddev_t *mddev) static void raid5_quiesce(mddev_t *mddev, int state) { - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; switch(state) { case 2: /* resume for a suspend */ @@ -5111,7 +5131,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev) mddev->new_level = 5; mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; - mddev->new_chunk = chunksect << 9; + mddev->new_chunk_sectors = chunksect; return setup_conf(mddev); } @@ -5150,24 +5170,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev) } -static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid5_check_reshape(mddev_t *mddev) { /* For a 2-drive array, the layout and chunk size can be changed * immediately as not restriping is needed. * For larger arrays we record the new value - after validation * to be used by a reshape pass. */ - raid5_conf_t *conf = mddev_to_conf(mddev); + raid5_conf_t *conf = mddev->private; + int new_chunk = mddev->new_chunk_sectors; - if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) + if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE>>9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } @@ -5175,49 +5195,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) /* They look valid */ if (mddev->raid_disks == 2) { - - if (new_layout >= 0) { - conf->algorithm = new_layout; - mddev->layout = mddev->new_layout = new_layout; + /* can make the change immediately */ + if (mddev->new_layout >= 0) { + conf->algorithm = mddev->new_layout; + mddev->layout = mddev->new_layout; } if (new_chunk > 0) { - conf->chunk_size = new_chunk; - mddev->chunk_size = mddev->new_chunk = new_chunk; + conf->chunk_sectors = new_chunk ; + mddev->chunk_sectors = new_chunk; } set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); - } else { - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk = new_chunk; } - return 0; + return check_reshape(mddev); } -static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) +static int raid6_check_reshape(mddev_t *mddev) { - if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) + int new_chunk = mddev->new_chunk_sectors; + + if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) return -EINVAL; if (new_chunk > 0) { - if (new_chunk & (new_chunk-1)) - /* not a power of 2 */ + if (!is_power_of_2(new_chunk)) return -EINVAL; - if (new_chunk < PAGE_SIZE) + if (new_chunk < (PAGE_SIZE >> 9)) return -EINVAL; - if (mddev->array_sectors & ((new_chunk>>9)-1)) + if (mddev->array_sectors & (new_chunk-1)) /* not factor of array size */ return -EINVAL; } /* They look valid */ - - if (new_layout >= 0) - mddev->new_layout = new_layout; - if (new_chunk > 0) - mddev->new_chunk = new_chunk; - - return 0; + return check_reshape(mddev); } static void *raid5_takeover(mddev_t *mddev) @@ -5227,8 +5237,6 @@ static void *raid5_takeover(mddev_t *mddev) * raid1 - if there are two drives. We need to know the chunk size * raid4 - trivial - just use a raid4 layout. * raid6 - Providing it is a *_6 layout - * - * For now, just do raid1 */ if (mddev->level == 1) @@ -5310,12 +5318,11 @@ static struct mdk_personality raid6_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, - .check_reshape = raid5_check_reshape, + .check_reshape = raid6_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid6_takeover, - .reconfig = raid6_reconfig, }; static struct mdk_personality raid5_personality = { @@ -5338,7 +5345,6 @@ static struct mdk_personality raid5_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid5_takeover, - .reconfig = raid5_reconfig, }; static struct mdk_personality raid4_personality = diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 52ba99954de..9459689c4ea 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -334,7 +334,8 @@ struct raid5_private_data { struct hlist_head *stripe_hashtbl; mddev_t *mddev; struct disk_info *spare; - int chunk_size, level, algorithm; + int chunk_sectors; + int level, algorithm; int max_degraded; int raid_disks; int max_nr_stripes; @@ -350,7 +351,8 @@ struct raid5_private_data { */ sector_t reshape_safe; int previous_raid_disks; - int prev_chunk, prev_algo; + int prev_chunk_sectors; + int prev_algo; short generation; /* increments with every reshape */ unsigned long reshape_checkpoint; /* Time we last updated * metadata */ @@ -408,8 +410,6 @@ struct raid5_private_data { typedef struct raid5_private_data raid5_conf_t; -#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) - /* * Our supported algorithms */ diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile index bcd8136d2f9..7c4c306dfa8 100644 --- a/drivers/misc/sgi-gru/Makefile +++ b/drivers/misc/sgi-gru/Makefile @@ -3,5 +3,5 @@ ifdef CONFIG_SGI_GRU_DEBUG endif obj-$(CONFIG_SGI_GRU) := gru.o -gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 3fde33c1e8f..3c9c06618e6 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -81,6 +81,8 @@ struct control_block_extended_exc_detail { int exopc; long exceptdet0; int exceptdet1; + int cbrstate; + int cbrexecstatus; }; /* @@ -107,7 +109,8 @@ struct gru_instruction_bits { unsigned char reserved2: 2; unsigned char istatus: 2; unsigned char isubstatus:4; - unsigned char reserved3: 2; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; /* DW 1 */ unsigned long idef4; /* 42 bits: TRi1, BufSize */ /* DW 2-6 */ @@ -250,17 +253,37 @@ struct gru_instruction { #define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) #define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) #define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) -#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) -#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLB_INVAL_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) /* * Exceptions are retried for the following cases. If any OTHER bits are set * in ecause, the exception is not retryable. */ -#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \ - CBE_CAUSE_RA_REQUEST_TIMEOUT | \ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ CBE_CAUSE_TLBHW_ERROR | \ - CBE_CAUSE_HA_REQUEST_TIMEOUT) + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) /* Message queue head structure */ union gru_mesqhead { @@ -600,9 +623,11 @@ static inline int gru_get_cb_substatus(void *cb) return cbs->isubstatus; } -/* Check the status of a CB. If the CB is in UPM mode, call the - * OS to handle the UPM status. - * Returns the CB status field value (0 for normal completion) +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * */ static inline int gru_check_status(void *cb) { @@ -610,34 +635,31 @@ static inline int gru_check_status(void *cb) int ret; ret = cbs->istatus; - if (ret == CBS_CALL_OS) + if (ret != CBS_ACTIVE) ret = gru_check_status_proc(cb); return ret; } -/* Wait for CB to complete. - * Returns the CB status field value (0 for normal completion) +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * */ static inline int gru_wait(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - int ret = cbs->istatus; - - if (ret != CBS_IDLE) - ret = gru_wait_proc(cb); - return ret; + return gru_wait_proc(cb); } -/* Wait for CB to complete. Aborts program if error. (Note: error does NOT +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT * mean TLB mis - only fatal errors such as memory parity error or user * bugs will cause termination. */ static inline void gru_wait_abort(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - - if (cbs->istatus != CBS_IDLE) - gru_wait_abort_proc(cb); + gru_wait_abort_proc(cb); } diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index ab118558552..679e0177828 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -166,7 +166,8 @@ static inline struct gru_state *irq_to_gru(int irq) * the GRU, atomic operations must be used to clear bits. */ static void get_clear_fault_map(struct gru_state *gru, - struct gru_tlb_fault_map *map) + struct gru_tlb_fault_map *imap, + struct gru_tlb_fault_map *dmap) { unsigned long i, k; struct gru_tlb_fault_map *tfm; @@ -177,7 +178,11 @@ static void get_clear_fault_map(struct gru_state *gru, k = tfm->fault_bits[i]; if (k) k = xchg(&tfm->fault_bits[i], 0UL); - map->fault_bits[i] = k; + imap->fault_bits[i] = k; + k = tfm->done_bits[i]; + if (k) + k = xchg(&tfm->done_bits[i], 0UL); + dmap->fault_bits[i] = k; } /* @@ -334,6 +339,12 @@ static int gru_try_dropin(struct gru_thread_state *gts, * Might be a hardware race OR a stupid user. Ignore FMM because FMM * is a transient state. */ + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } if (tfh->state == TFHSTATE_IDLE) goto failidle; if (tfh->state == TFHSTATE_MISS_FMM && cb) @@ -401,8 +412,17 @@ failfmm: gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; +failnoexception: + /* TFH status did not show exception pending */ + gru_flush_cache(tfh); + if (cb) + gru_flush_cache(cb); + STAT(tlb_dropin_fail_no_exception); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state); + return 0; + failidle: - /* TFH was idle - no miss pending */ + /* TFH state was idle - no miss pending */ gru_flush_cache(tfh); if (cb) gru_flush_cache(cb); @@ -438,7 +458,7 @@ failactive: irqreturn_t gru_intr(int irq, void *dev_id) { struct gru_state *gru; - struct gru_tlb_fault_map map; + struct gru_tlb_fault_map imap, dmap; struct gru_thread_state *gts; struct gru_tlb_fault_handle *tfh = NULL; int cbrnum, ctxnum; @@ -451,11 +471,15 @@ irqreturn_t gru_intr(int irq, void *dev_id) raw_smp_processor_id(), irq); return IRQ_NONE; } - get_clear_fault_map(gru, &map); - gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid, - map.fault_bits[0]); + get_clear_fault_map(gru, &imap, &dmap); + + for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { + complete(gru->gs_blade->bs_async_wq); + gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", + gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done); + } - for_each_cbr_in_tfm(cbrnum, map.fault_bits) { + for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { tfh = get_tfh_by_index(gru, cbrnum); prefetchw(tfh); /* Helps on hdw, required for emulator */ @@ -472,7 +496,9 @@ irqreturn_t gru_intr(int irq, void *dev_id) * This is running in interrupt context. Trylock the mmap_sem. * If it fails, retry the fault in user context. */ - if (down_read_trylock(>s->ts_mm->mmap_sem)) { + if (!gts->ts_force_cch_reload && + down_read_trylock(>s->ts_mm->mmap_sem)) { + gts->ustats.fmm_tlbdropin++; gru_try_dropin(gts, tfh, NULL); up_read(>s->ts_mm->mmap_sem); } else { @@ -491,6 +517,7 @@ static int gru_user_dropin(struct gru_thread_state *gts, struct gru_mm_struct *gms = gts->ts_gms; int ret; + gts->ustats.upm_tlbdropin++; while (1) { wait_event(gms->ms_wait_queue, atomic_read(&gms->ms_range_active) == 0); @@ -546,8 +573,8 @@ int gru_handle_user_call_os(unsigned long cb) * CCH may contain stale data if ts_force_cch_reload is set. */ if (gts->ts_gru && gts->ts_force_cch_reload) { - gru_update_cch(gts, 0); gts->ts_force_cch_reload = 0; + gru_update_cch(gts, 0); } ret = -EAGAIN; @@ -589,20 +616,26 @@ int gru_get_exception_detail(unsigned long arg) } else if (gts->ts_gru) { cbrnum = thread_cbr_number(gts, ucbnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum); - prefetchw(cbe);/* Harmless on hardware, required for emulator */ + gru_flush_cache(cbe); /* CBE not coherent */ excdet.opc = cbe->opccpy; excdet.exopc = cbe->exopccpy; excdet.ecause = cbe->ecause; excdet.exceptdet0 = cbe->idef1upd; excdet.exceptdet1 = cbe->idef3upd; + excdet.cbrstate = cbe->cbrstate; + excdet.cbrexecstatus = cbe->cbrexecstatus; + gru_flush_cache(cbe); ret = 0; } else { ret = -EAGAIN; } gru_unlock_gts(gts); - gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, - excdet.ecause); + gru_dbg(grudev, + "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, " + "exdet0 0x%lx, exdet1 0x%x\n", + excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus, + excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) ret = -EFAULT; return ret; @@ -627,7 +660,7 @@ static int gru_unload_all_contexts(void) if (gts && mutex_trylock(>s->ts_ctxlock)) { spin_unlock(&gru->gs_lock); gru_unload_context(gts, 1); - gru_unlock_gts(gts); + mutex_unlock(>s->ts_ctxlock); spin_lock(&gru->gs_lock); } } @@ -669,6 +702,7 @@ int gru_user_flush_tlb(unsigned long arg) { struct gru_thread_state *gts; struct gru_flush_tlb_req req; + struct gru_mm_struct *gms; STAT(user_flush_tlb); if (copy_from_user(&req, (void __user *)arg, sizeof(req))) @@ -681,8 +715,34 @@ int gru_user_flush_tlb(unsigned long arg) if (!gts) return -EINVAL; - gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len); + gms = gts->ts_gms; gru_unlock_gts(gts); + gru_flush_tlb_range(gms, req.vaddr, req.len); + + return 0; +} + +/* + * Fetch GSEG statisticss + */ +long gru_get_gseg_statistics(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_get_gseg_statistics_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gts = gru_find_lock_gts(req.gseg); + if (gts) { + memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); + gru_unlock_gts(gts); + } else { + memset(&req.stats, 0, sizeof(gts->ustats)); + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; return 0; } @@ -691,18 +751,34 @@ int gru_user_flush_tlb(unsigned long arg) * Register the current task as the user of the GSEG slice. * Needed for TLB fault interrupt targeting. */ -int gru_set_task_slice(long address) +int gru_set_context_option(unsigned long arg) { struct gru_thread_state *gts; + struct gru_set_context_option_req req; + int ret = 0; + + STAT(set_context_option); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); - STAT(set_task_slice); - gru_dbg(grudev, "address 0x%lx\n", address); - gts = gru_alloc_locked_gts(address); + gts = gru_alloc_locked_gts(req.gseg); if (!gts) return -EINVAL; - gts->ts_tgid_owner = current->tgid; + switch (req.op) { + case sco_gseg_owner: + /* Register the current task as the GSEG owner */ + gts->ts_tgid_owner = current->tgid; + break; + case sco_cch_req_slice: + /* Set the CCH slice option */ + gts->ts_cch_req_slice = req.val1 & 3; + break; + default: + ret = -EINVAL; + } gru_unlock_gts(gts); - return 0; + return ret; } diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 3ce2920e2bf..fa2d93a9fb8 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -46,6 +46,7 @@ struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; unsigned long gru_start_paddr __read_mostly; +void *gru_start_vaddr __read_mostly; unsigned long gru_end_paddr __read_mostly; unsigned int gru_max_gids __read_mostly; struct gru_stats_s gru_stats; @@ -135,11 +136,9 @@ static int gru_create_new_context(unsigned long arg) if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT; - if (req.data_segment_bytes == 0 || - req.data_segment_bytes > max_user_dsr_bytes) + if (req.data_segment_bytes > max_user_dsr_bytes) return -EINVAL; - if (!req.control_blocks || !req.maximum_thread_count || - req.control_blocks > max_user_cbrs) + if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) return -EINVAL; if (!(req.options & GRU_OPT_MISS_MASK)) @@ -184,41 +183,6 @@ static long gru_get_config_info(unsigned long arg) } /* - * Get GRU chiplet status - */ -static long gru_get_chiplet_status(unsigned long arg) -{ - struct gru_state *gru; - struct gru_chiplet_info info; - - if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - return -EFAULT; - - if (info.node == -1) - info.node = numa_node_id(); - if (info.node >= num_possible_nodes() || - info.chiplet >= GRU_CHIPLETS_PER_HUB || - info.node < 0 || info.chiplet < 0) - return -EINVAL; - - info.blade = uv_node_to_blade_id(info.node); - gru = get_gru(info.blade, info.chiplet); - - info.total_dsr_bytes = GRU_NUM_DSR_BYTES; - info.total_cbr = GRU_NUM_CB; - info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES - - gru->gs_reserved_dsr_bytes; - info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs; - info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) * - GRU_DSR_AU_BYTES; - info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; - - if (copy_to_user((void __user *)arg, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -/* * gru_file_unlocked_ioctl * * Called to update file attributes via IOCTL calls. @@ -234,8 +198,8 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, case GRU_CREATE_CONTEXT: err = gru_create_new_context(arg); break; - case GRU_SET_TASK_SLICE: - err = gru_set_task_slice(arg); + case GRU_SET_CONTEXT_OPTION: + err = gru_set_context_option(arg); break; case GRU_USER_GET_EXCEPTION_DETAIL: err = gru_get_exception_detail(arg); @@ -243,18 +207,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, case GRU_USER_UNLOAD_CONTEXT: err = gru_user_unload_context(arg); break; - case GRU_GET_CHIPLET_STATUS: - err = gru_get_chiplet_status(arg); - break; case GRU_USER_FLUSH_TLB: err = gru_user_flush_tlb(arg); break; case GRU_USER_CALL_OS: err = gru_handle_user_call_os(arg); break; + case GRU_GET_GSEG_STATISTICS: + err = gru_get_gseg_statistics(arg); + break; + case GRU_KTEST: + err = gru_ktest(arg); + break; case GRU_GET_CONFIG_INFO: err = gru_get_config_info(arg); break; + case GRU_DUMP_CHIPLET_STATE: + err = gru_dump_chiplet_request(arg); + break; } return err; } @@ -282,7 +252,6 @@ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n", bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, gru->gs_gru_base_paddr); - gru_kservices_init(gru); } static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) @@ -309,6 +278,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; spin_lock_init(&gru_base[bid]->bs_lock); + init_rwsem(&gru_base[bid]->bs_kgts_sema); dsrbytes = 0; cbrs = 0; @@ -372,7 +342,6 @@ static int __init gru_init(void) { int ret, irq, chip; char id[10]; - void *gru_start_vaddr; if (!is_uv_system()) return 0; @@ -422,6 +391,7 @@ static int __init gru_init(void) printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); goto exit3; } + gru_kservices_init(); printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, GRU_DRIVER_VERSION_STR); @@ -440,7 +410,7 @@ exit1: static void __exit gru_exit(void) { - int i, bid, gid; + int i, bid; int order = get_order(sizeof(struct gru_state) * GRU_CHIPLETS_PER_BLADE); @@ -449,10 +419,7 @@ static void __exit gru_exit(void) for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) free_irq(IRQ_GRU + i, NULL); - - foreach_gid(gid) - gru_kservices_exit(GID_TO_GRU(gid)); - + gru_kservices_exit(); for (bid = 0; bid < GRU_MAX_BLADES; bid++) free_pages((unsigned long)gru_base[bid], order); diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 9b7ccb32869..37e7cfc53b9 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -57,7 +57,7 @@ static void start_instruction(void *h) static int wait_instruction_complete(void *h, enum mcs_op opc) { int status; - cycles_t start_time = get_cycles(); + unsigned long start_time = get_cycles(); while (1) { cpu_relax(); @@ -65,25 +65,16 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) if (status != CCHSTATUS_ACTIVE) break; if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) - panic("GRU %p is malfunctioning\n", h); + panic("GRU %p is malfunctioning: start %ld, end %ld\n", + h, start_time, (unsigned long)get_cycles()); } if (gru_options & OPT_STATS) update_mcs_stats(opc, get_cycles() - start_time); return status; } -int cch_allocate(struct gru_context_configuration_handle *cch, - int asidval, int sizeavail, unsigned long cbrmap, - unsigned long dsrmap) +int cch_allocate(struct gru_context_configuration_handle *cch) { - int i; - - for (i = 0; i < 8; i++) { - cch->asid[i] = (asidval++); - cch->sizeavail[i] = sizeavail; - } - cch->dsr_allocation_map = dsrmap; - cch->cbr_allocation_map = cbrmap; cch->opc = CCHOP_ALLOCATE; start_instruction(cch); return wait_instruction_complete(cch, cchop_allocate); diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index 1ed74d7508c..f44112242d0 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -39,7 +39,6 @@ #define GRU_NUM_CBE 128 #define GRU_NUM_TFH 128 #define GRU_NUM_CCH 16 -#define GRU_NUM_GSH 1 /* Maximum resource counts that can be reserved by user programs */ #define GRU_NUM_USER_CBR GRU_NUM_CBE @@ -56,7 +55,6 @@ #define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) #define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) #define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) -#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000) /* User gseg constants */ #define GRU_GSEG_STRIDE (4 * 1024 * 1024) @@ -251,15 +249,15 @@ struct gru_tlb_fault_handle { unsigned int fill1:9; unsigned int status:2; - unsigned int fill2:1; - unsigned int color:1; + unsigned int fill2:2; unsigned int state:3; unsigned int fill3:1; - unsigned int cause:7; /* DW 0 - high 32 */ + unsigned int cause:6; + unsigned int cb_int:1; unsigned int fill4:1; - unsigned int indexway:12; + unsigned int indexway:12; /* DW 0 - high 32 */ unsigned int fill5:4; unsigned int ctxnum:4; @@ -457,21 +455,7 @@ enum gru_cbr_state { CBRSTATE_BUSY_INTERRUPT, }; -/* CBE cbrexecstatus bits */ -#define CBR_EXS_ABORT_OCC_BIT 0 -#define CBR_EXS_INT_OCC_BIT 1 -#define CBR_EXS_PENDING_BIT 2 -#define CBR_EXS_QUEUED_BIT 3 -#define CBR_EXS_TLBHW_BIT 4 -#define CBR_EXS_EXCEPTION_BIT 5 - -#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) -#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) -#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) -#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) -#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) -#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) - +/* CBE cbrexecstatus bits - defined in gru_instructions.h*/ /* CBE ecause bits - defined in gru_instructions.h */ /* @@ -495,9 +479,7 @@ enum gru_cbr_state { /* minimum TLB purge count to ensure a full purge */ #define GRUMAXINVAL 1024UL -int cch_allocate(struct gru_context_configuration_handle *cch, - int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap); - +int cch_allocate(struct gru_context_configuration_handle *cch); int cch_start(struct gru_context_configuration_handle *cch); int cch_interrupt(struct gru_context_configuration_handle *cch); int cch_deallocate(struct gru_context_configuration_handle *cch); diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c new file mode 100644 index 00000000000..55eabfa8558 --- /dev/null +++ b/drivers/misc/sgi-gru/grukdump.c @@ -0,0 +1,232 @@ +/* + * SN Platform GRU Driver + * + * Dump GRU State + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#include "grulib.h" + +#define CCH_LOCK_ATTEMPTS 10 + +static int gru_user_copy_handle(void __user **dp, void *s) +{ + if (copy_to_user(*dp, s, GRU_HANDLE_BYTES)) + return -1; + *dp += GRU_HANDLE_BYTES; + return 0; +} + +static int gru_dump_context_data(void *grubase, + struct gru_context_configuration_handle *cch, + void __user *ubuf, int ctxnum, int dsrcnt) +{ + void *cb, *cbe, *tfh, *gseg; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + tfh = grubase + GRU_TFH_BASE; + + for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (gru_user_copy_handle(&ubuf, cb)) + goto fail; + if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) + goto fail; + if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE)) + goto fail; + cb += GRU_HANDLE_STRIDE; + } + if (dsrcnt) + memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE); + return 0; + +fail: + return -EFAULT; +} + +static int gru_dump_tfm(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_fault_map *tfm; + int i, ret, bytes; + + bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TFM; i++) { + tfm = get_tfm(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tfm)) + goto fail; + } + return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_tgh(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_global_handle *tgh; + int i, ret, bytes; + + bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TGH; i++) { + tgh = get_tgh(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tgh)) + goto fail; + } + return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_context(struct gru_state *gru, int ctxnum, + void __user *ubuf, void __user *ubufend, char data_opt, + char lock_cch) +{ + struct gru_dump_context_header hdr; + struct gru_dump_context_header __user *uhdr = ubuf; + struct gru_context_configuration_handle *cch, *ubufcch; + struct gru_thread_state *gts; + int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0; + void *grubase; + + memset(&hdr, 0, sizeof(hdr)); + grubase = gru->gs_gru_base_vaddr; + cch = get_cch(grubase, ctxnum); + for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) { + cch_locked = trylock_cch_handle(cch); + if (cch_locked) + break; + msleep(1); + } + + ubuf += sizeof(hdr); + ubufcch = ubuf; + if (gru_user_copy_handle(&ubuf, cch)) + goto fail; + if (cch_locked) + ubufcch->delresp = 0; + bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES; + + if (cch_locked || !lock_cch) { + gts = gru->gs_gts[ctxnum]; + if (gts && gts->ts_vma) { + hdr.pid = gts->ts_tgid_owner; + hdr.vaddr = gts->ts_vma->vm_start; + } + if (cch->state != CCHSTATE_INACTIVE) { + cbrcnt = hweight64(cch->cbr_allocation_map) * + GRU_CBR_AU_SIZE; + dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) * + GRU_DSR_AU_CL : 0; + } + bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + else + ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, + dsrcnt); + + } + if (cch_locked) + unlock_cch_handle(cch); + if (ret) + return ret; + + hdr.magic = GRU_DUMP_MAGIC; + hdr.gid = gru->gs_gid; + hdr.ctxnum = ctxnum; + hdr.cbrcnt = cbrcnt; + hdr.dsrcnt = dsrcnt; + hdr.cch_locked = cch_locked; + if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr))) + ret = -EFAULT; + + return ret ? ret : bytes; + +fail: + unlock_cch_handle(cch); + return -EFAULT; +} + +int gru_dump_chiplet_request(unsigned long arg) +{ + struct gru_state *gru; + struct gru_dump_chiplet_state_req req; + void __user *ubuf; + void __user *ubufend; + int ctxnum, ret, cnt = 0; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* Currently, only dump by gid is implemented */ + if (req.gid >= gru_max_gids || req.gid < 0) + return -EINVAL; + + gru = GID_TO_GRU(req.gid); + ubuf = req.buf; + ubufend = req.buf + req.buflen; + + ret = gru_dump_tfm(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + ret = gru_dump_tgh(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (req.ctxnum == ctxnum || req.ctxnum < 0) { + ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, + req.data_opt, req.lock_cch); + if (ret < 0) + goto fail; + ubuf += ret; + cnt++; + } + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + return cnt; + +fail: + return ret; +} diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index d8bd7d84a7c..eedbf9c3276 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -31,6 +31,7 @@ #include <linux/proc_fs.h> #include <linux/interrupt.h> #include <linux/uaccess.h> +#include <linux/delay.h> #include "gru.h" #include "grulib.h" #include "grutables.h" @@ -45,18 +46,66 @@ * resources. This will likely be replaced when we better understand the * kernel/user requirements. * - * At boot time, the kernel permanently reserves a fixed number of - * CBRs/DSRs for each cpu to use. The resources are all taken from - * the GRU chiplet 1 on the blade. This leaves the full set of resources - * of chiplet 0 available to be allocated to a single user. + * Blade percpu resources reserved for kernel use. These resources are + * reserved whenever the the kernel context for the blade is loaded. Note + * that the kernel context is not guaranteed to be always available. It is + * loaded on demand & can be stolen by a user if the user demand exceeds the + * kernel demand. The kernel can always reload the kernel context but + * a SLEEP may be required!!!. + * + * Async Overview: + * + * Each blade has one "kernel context" that owns GRU kernel resources + * located on the blade. Kernel drivers use GRU resources in this context + * for sending messages, zeroing memory, etc. + * + * The kernel context is dynamically loaded on demand. If it is not in + * use by the kernel, the kernel context can be unloaded & given to a user. + * The kernel context will be reloaded when needed. This may require that + * a context be stolen from a user. + * NOTE: frequent unloading/reloading of the kernel context is + * expensive. We are depending on batch schedulers, cpusets, sane + * drivers or some other mechanism to prevent the need for frequent + * stealing/reloading. + * + * The kernel context consists of two parts: + * - 1 CB & a few DSRs that are reserved for each cpu on the blade. + * Each cpu has it's own private resources & does not share them + * with other cpus. These resources are used serially, ie, + * locked, used & unlocked on each call to a function in + * grukservices. + * (Now that we have dynamic loading of kernel contexts, I + * may rethink this & allow sharing between cpus....) + * + * - Additional resources can be reserved long term & used directly + * by UV drivers located in the kernel. Drivers using these GRU + * resources can use asynchronous GRU instructions that send + * interrupts on completion. + * - these resources must be explicitly locked/unlocked + * - locked resources prevent (obviously) the kernel + * context from being unloaded. + * - drivers using these resource directly issue their own + * GRU instruction and must wait/check completion. + * + * When these resources are reserved, the caller can optionally + * associate a wait_queue with the resources and use asynchronous + * GRU instructions. When an async GRU instruction completes, the + * driver will do a wakeup on the event. + * */ -/* Blade percpu resources PERMANENTLY reserved for kernel use */ + +#define ASYNC_HAN_TO_BID(h) ((h) - 1) +#define ASYNC_BID_TO_HAN(b) ((b) + 1) +#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] +#define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \ + (GRU_SIZE * GRU_CHIPLETS_PER_BLADE)) +#define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)] + #define GRU_NUM_KERNEL_CBR 1 #define GRU_NUM_KERNEL_DSR_BYTES 256 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ GRU_CACHE_LINE_BYTES) -#define KERNEL_CTXNUM 15 /* GRU instruction attributes for all instructions */ #define IMA IMA_CB_DELAY @@ -98,6 +147,108 @@ struct message_header { #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) +/* + * Reload the blade's kernel context into a GRU chiplet. Called holding + * the bs_kgts_sema for READ. Will steal user contexts if necessary. + */ +static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) +{ + struct gru_state *gru; + struct gru_thread_state *kgts; + void *vaddr; + int ctxnum, ncpus; + + up_read(&bs->bs_kgts_sema); + down_write(&bs->bs_kgts_sema); + + if (!bs->bs_kgts) + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0); + kgts = bs->bs_kgts; + + if (!kgts->ts_gru) { + STAT(load_kernel_context); + ncpus = uv_blade_nr_possible_cpus(blade_id); + kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( + GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); + kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( + GRU_NUM_KERNEL_DSR_BYTES * ncpus + + bs->bs_async_dsr_bytes); + while (!gru_assign_gru_context(kgts, blade_id)) { + msleep(1); + gru_steal_context(kgts, blade_id); + } + gru_load_context(kgts); + gru = bs->bs_kgts->ts_gru; + vaddr = gru->gs_gru_base_vaddr; + ctxnum = kgts->ts_ctxnum; + bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); + bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); + } + downgrade_write(&bs->bs_kgts_sema); +} + +/* + * Free all kernel contexts that are not currently in use. + * Returns 0 if all freed, else number of inuse context. + */ +static int gru_free_kernel_contexts(void) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int bid, ret = 0; + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) { + bs = gru_base[bid]; + if (!bs) + continue; + if (down_write_trylock(&bs->bs_kgts_sema)) { + kgts = bs->bs_kgts; + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + kfree(kgts); + bs->bs_kgts = NULL; + up_write(&bs->bs_kgts_sema); + } else { + ret++; + } + } + return ret; +} + +/* + * Lock & load the kernel context for the specified blade. + */ +static struct gru_blade_state *gru_lock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + STAT(lock_kernel_context); + bs = gru_base[blade_id]; + + down_read(&bs->bs_kgts_sema); + if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) + gru_load_kernel_context(bs, blade_id); + return bs; + +} + +/* + * Unlock the kernel context for the specified blade. Context is not + * unloaded but may be stolen before next use. + */ +static void gru_unlock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + bs = gru_base[blade_id]; + up_read(&bs->bs_kgts_sema); + STAT(unlock_kernel_context); +} + +/* + * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. + * - returns with preemption disabled + */ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) { struct gru_blade_state *bs; @@ -105,30 +256,148 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); preempt_disable(); - bs = gru_base[uv_numa_blade_id()]; + bs = gru_lock_kernel_context(uv_numa_blade_id()); lcpu = uv_blade_processor_id(); *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; return 0; } +/* + * Free the current cpus reserved DSR/CBR resources. + */ static void gru_free_cpu_resources(void *cb, void *dsr) { + gru_unlock_kernel_context(uv_numa_blade_id()); preempt_enable(); } +/* + * Reserve GRU resources to be used asynchronously. + * Note: currently supports only 1 reservation per blade. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * output: + * handle to identify resource + * (0 = async resources already reserved) + */ +unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int ret = 0; + + bs = gru_base[blade_id]; + + down_write(&bs->bs_kgts_sema); + + /* Verify no resources already reserved */ + if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) + goto done; + bs->bs_async_dsr_bytes = dsr_bytes; + bs->bs_async_cbrs = cbrs; + bs->bs_async_wq = cmp; + kgts = bs->bs_kgts; + + /* Resources changed. Unload context if already loaded */ + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + ret = ASYNC_BID_TO_HAN(blade_id); + +done: + up_write(&bs->bs_kgts_sema); + return ret; +} + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +void gru_release_async_resources(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + down_write(&bs->bs_kgts_sema); + bs->bs_async_dsr_bytes = 0; + bs->bs_async_cbrs = 0; + bs->bs_async_wq = NULL; + up_write(&bs->bs_kgts_sema); +} + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +void gru_wait_async_cbr(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + wait_for_completion(bs->bs_async_wq); + mb(); +} + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + int blade_id = ASYNC_HAN_TO_BID(han); + int ncpus; + + gru_lock_kernel_context(blade_id); + ncpus = uv_blade_nr_possible_cpus(blade_id); + if (cb) + *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; + if (dsr) + *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; +} + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +void gru_unlock_async_resource(unsigned long han) +{ + int blade_id = ASYNC_HAN_TO_BID(han); + + gru_unlock_kernel_context(blade_id); +} + +/*----------------------------------------------------------------------*/ int gru_get_cb_exception_detail(void *cb, struct control_block_extended_exc_detail *excdet) { struct gru_control_block_extended *cbe; + struct gru_blade_state *bs; + int cbrnum; - cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); - prefetchw(cbe); /* Harmless on hardware, required for emulator */ + bs = KCB_TO_BS(cb); + cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); + cbe = get_cbe(GRUBASE(cb), cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; excdet->exceptdet0 = cbe->idef1upd; excdet->exceptdet1 = cbe->idef3upd; + gru_flush_cache(cbe); return 0; } @@ -167,13 +436,13 @@ static int gru_retry_exception(void *cb) int retry = EXCEPTION_RETRY_LIMIT; while (1) { - if (gru_get_cb_message_queue_substatus(cb)) - break; if (gru_wait_idle_or_exception(gen) == CBS_IDLE) return CBS_IDLE; - + if (gru_get_cb_message_queue_substatus(cb)) + return CBS_EXCEPTION; gru_get_cb_exception_detail(cb, &excdet); - if (excdet.ecause & ~EXCEPTION_RETRY_BITS) + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) break; if (retry-- == 0) break; @@ -416,6 +685,29 @@ static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) mqd->interrupt_vector); } +/* + * Handle a PUT failure. Note: if message was a 2-line message, one of the + * lines might have successfully have been written. Before sending the + * message, "present" must be cleared in BOTH lines to prevent the receiver + * from prematurely seeing the full message. + */ +static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + unsigned long m; + + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + if (lines == 2) { + gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + } + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + send_message_queue_interrupt(mqd); + return MQE_OK; +} /* * Handle a gru_mesq failure. Some of these failures are software recoverable @@ -425,7 +717,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, void *mesg, int lines) { int substatus, ret = 0; - unsigned long m; substatus = gru_get_cb_message_queue_substatus(cb); switch (substatus) { @@ -447,14 +738,7 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, break; case CBSS_PUT_NACKED: STAT(mesq_send_put_nacked); - m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); - gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); - if (gru_wait(cb) == CBS_IDLE) { - ret = MQE_OK; - send_message_queue_interrupt(mqd); - } else { - ret = MQE_UNEXPECTED_CB_ERR; - } + ret = send_message_put_nacked(cb, mqd, mesg, lines); break; default: BUG(); @@ -597,115 +881,177 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa); /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ /* Temp - will delete after we gain confidence in the GRU */ -static __cacheline_aligned unsigned long word0; -static __cacheline_aligned unsigned long word1; -static int quicktest(struct gru_state *gru) +static int quicktest0(unsigned long arg) { + unsigned long word0; + unsigned long word1; void *cb; - void *ds; + void *dsr; unsigned long *p; + int ret = -EIO; - cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); - ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); - p = ds; + if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + p = dsr; word0 = MAGIC; + word1 = 0; - gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA); - if (gru_wait(cb) != CBS_IDLE) - BUG(); + gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n"); + goto done; + } - if (*(unsigned long *)ds != MAGIC) - BUG(); - gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA); - if (gru_wait(cb) != CBS_IDLE) - BUG(); + if (*p != MAGIC) { + printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p); + goto done; + } + gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n"); + goto done; + } - if (word0 != word1 || word0 != MAGIC) { - printk - ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n", - gru->gs_gid, word1, MAGIC); - BUG(); /* ZZZ should not be fatal */ + if (word0 != word1 || word1 != MAGIC) { + printk(KERN_DEBUG + "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n", + word1, MAGIC); + goto done; } + ret = 0; - return 0; +done: + gru_free_cpu_resources(cb, dsr); + return ret; } +#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) -int gru_kservices_init(struct gru_state *gru) +static int quicktest1(unsigned long arg) { - struct gru_blade_state *bs; - struct gru_context_configuration_handle *cch; - unsigned long cbr_map, dsr_map; - int err, num, cpus_possible; - - /* - * Currently, resources are reserved ONLY on the second chiplet - * on each blade. This leaves ALL resources on chiplet 0 available - * for user code. - */ - bs = gru->gs_blade; - if (gru != &bs->bs_grus[1]) - return 0; - - cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id); - - num = GRU_NUM_KERNEL_CBR * cpus_possible; - cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL); - gru->gs_reserved_cbrs += num; - - num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible; - dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL); - gru->gs_reserved_dsr_bytes += num; - - gru->gs_active_contexts++; - __set_bit(KERNEL_CTXNUM, &gru->gs_context_map); - cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); - - bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, - KERNEL_CTXNUM, 0); - bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, - KERNEL_CTXNUM, 0); - - lock_cch_handle(cch); - cch->tfm_fault_bit_enable = 0; - cch->tlb_int_enable = 0; - cch->tfm_done_bit_enable = 0; - cch->unmap_enable = 1; - err = cch_allocate(cch, 0, 0, cbr_map, dsr_map); - if (err) { - gru_dbg(grudev, - "Unable to allocate kernel CCH: gid %d, err %d\n", - gru->gs_gid, err); - BUG(); + struct gru_message_queue_desc mqd; + void *p, *mq; + unsigned long *dw; + int i, ret = -EIO; + char mes[GRU_CACHE_LINE_BYTES], *m; + + /* Need 1K cacheline aligned that does not cross page boundary */ + p = kmalloc(4096, 0); + mq = ALIGNUP(p, 1024); + memset(mes, 0xee, sizeof(mes)); + dw = mq; + + gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); + for (i = 0; i < 6; i++) { + mes[8] = i; + do { + ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); + } while (ret == MQE_CONGESTION); + if (ret) + break; } - if (cch_start(cch)) { - gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n", - gru->gs_gid, err); - BUG(); + if (ret != MQE_QUEUE_FULL || i != 4) + goto done; + + for (i = 0; i < 6; i++) { + m = gru_get_next_message(&mqd); + if (!m || m[8] != i) + break; + gru_free_message(&mqd, m); } - unlock_cch_handle(cch); + ret = (i == 4) ? 0 : -EIO; - if (gru_options & GRU_QUICKLOOK) - quicktest(gru); - return 0; +done: + kfree(p); + return ret; } -void gru_kservices_exit(struct gru_state *gru) +static int quicktest2(unsigned long arg) { - struct gru_context_configuration_handle *cch; - struct gru_blade_state *bs; + static DECLARE_COMPLETION(cmp); + unsigned long han; + int blade_id = 0; + int numcb = 4; + int ret = 0; + unsigned long *buf; + void *cb0, *cb; + int i, k, istatus, bytes; + + bytes = numcb * 4 * 8; + buf = kmalloc(bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = -EBUSY; + han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); + if (!han) + goto done; + + gru_lock_async_resource(han, &cb0, NULL); + memset(buf, 0xee, bytes); + for (i = 0; i < numcb; i++) + gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, + XTYPE_DW, 4, 1, IMA_INTERRUPT); + + ret = 0; + for (k = 0; k < numcb; k++) { + gru_wait_async_cbr(han); + for (i = 0; i < numcb; i++) { + cb = cb0 + i * GRU_HANDLE_STRIDE; + istatus = gru_check_status(cb); + if (istatus == CBS_ACTIVE) + continue; + if (istatus == CBS_EXCEPTION) + ret = -EFAULT; + else if (buf[i] || buf[i + 1] || buf[i + 2] || + buf[i + 3]) + ret = -EIO; + } + } + BUG_ON(cmp.done); - bs = gru->gs_blade; - if (gru != &bs->bs_grus[1]) - return; + gru_unlock_async_resource(han); + gru_release_async_resources(han); +done: + kfree(buf); + return ret; +} - cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); - lock_cch_handle(cch); - if (cch_interrupt_sync(cch)) - BUG(); - if (cch_deallocate(cch)) +/* + * Debugging only. User hook for various kernel tests + * of driver & gru. + */ +int gru_ktest(unsigned long arg) +{ + int ret = -EINVAL; + + switch (arg & 0xff) { + case 0: + ret = quicktest0(arg); + break; + case 1: + ret = quicktest1(arg); + break; + case 2: + ret = quicktest2(arg); + break; + case 99: + ret = gru_free_kernel_contexts(); + break; + } + return ret; + +} + +int gru_kservices_init(void) +{ + return 0; +} + +void gru_kservices_exit(void) +{ + if (gru_free_kernel_contexts()) BUG(); - unlock_cch_handle(cch); } diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h index 747ed315d56..d60d34bca44 100644 --- a/drivers/misc/sgi-gru/grukservices.h +++ b/drivers/misc/sgi-gru/grukservices.h @@ -146,4 +146,55 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, unsigned int bytes); +/* + * Reserve GRU resources to be used asynchronously. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * cmp - completion structure for waiting for + * async completions + * output: + * handle to identify resource + * (0 = no resources) + */ +extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp); + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +extern void gru_release_async_resources(unsigned long han); + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +extern void gru_wait_async_cbr(unsigned long han); + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr); + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +extern void gru_unlock_async_resource(unsigned long han); + #endif /* __GRU_KSERVICES_H_ */ diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h index e56e196a699..889bc442a3e 100644 --- a/drivers/misc/sgi-gru/grulib.h +++ b/drivers/misc/sgi-gru/grulib.h @@ -32,8 +32,8 @@ /* Set Number of Request Blocks */ #define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) -/* Register task as using the slice */ -#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *) +/* Set Context Options */ +#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *) /* Fetch exception detail */ #define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) @@ -44,8 +44,11 @@ /* For user unload context */ #define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) -/* For fetching GRU chiplet status */ -#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *) +/* For dumpping GRU chiplet state */ +#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *) + +/* For getting gseg statistics */ +#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *) /* For user TLB flushing (primarily for tests) */ #define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) @@ -53,8 +56,26 @@ /* Get some config options (primarily for tests & emulator) */ #define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) +/* Various kernel self-tests */ +#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *) + #define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) +#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) + +/* + * Statictics kept on a per-GTS basis. + */ +struct gts_statistics { + unsigned long fmm_tlbdropin; + unsigned long upm_tlbdropin; + unsigned long context_stolen; +}; + +struct gru_get_gseg_statistics_req { + unsigned long gseg; + struct gts_statistics stats; +}; /* * Structure used to pass TLB flush parameters to the driver @@ -75,6 +96,16 @@ struct gru_unload_context_req { }; /* + * Structure used to set context options + */ +enum {sco_gseg_owner, sco_cch_req_slice}; +struct gru_set_context_option_req { + unsigned long gseg; + int op; + unsigned long val1; +}; + +/* * Structure used to pass TLB flush parameters to the driver */ struct gru_flush_tlb_req { @@ -84,6 +115,36 @@ struct gru_flush_tlb_req { }; /* + * Structure used to pass TLB flush parameters to the driver + */ +enum {dcs_pid, dcs_gid}; +struct gru_dump_chiplet_state_req { + unsigned int op; + unsigned int gid; + int ctxnum; + char data_opt; + char lock_cch; + pid_t pid; + void *buf; + size_t buflen; + /* ---- output --- */ + unsigned int num_contexts; +}; + +#define GRU_DUMP_MAGIC 0x3474ab6c +struct gru_dump_context_header { + unsigned int magic; + unsigned int gid; + unsigned char ctxnum; + unsigned char cbrcnt; + unsigned char dsrcnt; + pid_t pid; + unsigned long vaddr; + int cch_locked; + unsigned long data[0]; +}; + +/* * GRU configuration info (temp - for testing) */ struct gru_config_info { diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index ec3f7a17d22..3bc643dad60 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -3,11 +3,21 @@ * * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/kernel.h> @@ -96,7 +106,7 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid) gid = gru->gs_gid; again: for (i = 0; i < GRU_NUM_CCH; i++) { - if (!gru->gs_gts[i]) + if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) continue; inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", @@ -150,7 +160,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax, unsigned long bits = 0; int i; - do { + while (n--) { i = find_first_bit(p, mmax); if (i == mmax) BUG(); @@ -158,7 +168,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax, __set_bit(i, &bits); if (idx) *idx++ = i; - } while (--n); + } return bits; } @@ -299,38 +309,39 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data /* * Allocate a thread state structure. */ -static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, - struct gru_vma_data *vdata, - int tsid) +struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, int options, int tsid) { struct gru_thread_state *gts; int bytes; - bytes = DSR_BYTES(vdata->vd_dsr_au_count) + - CBR_BYTES(vdata->vd_cbr_au_count); + bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); bytes += sizeof(struct gru_thread_state); - gts = kzalloc(bytes, GFP_KERNEL); + gts = kmalloc(bytes, GFP_KERNEL); if (!gts) return NULL; STAT(gts_alloc); + memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ atomic_set(>s->ts_refcnt, 1); mutex_init(>s->ts_ctxlock); - gts->ts_cbr_au_count = vdata->vd_cbr_au_count; - gts->ts_dsr_au_count = vdata->vd_dsr_au_count; - gts->ts_user_options = vdata->vd_user_options; + gts->ts_cbr_au_count = cbr_au_count; + gts->ts_dsr_au_count = dsr_au_count; + gts->ts_user_options = options; gts->ts_tsid = tsid; - gts->ts_user_options = vdata->vd_user_options; gts->ts_ctxnum = NULLCTX; - gts->ts_mm = current->mm; - gts->ts_vma = vma; gts->ts_tlb_int_select = -1; - gts->ts_gms = gru_register_mmu_notifier(); + gts->ts_cch_req_slice = -1; gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); - if (!gts->ts_gms) - goto err; + if (vma) { + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gts->ts_gms = gru_register_mmu_notifier(); + if (!gts->ts_gms) + goto err; + } - gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts); + gru_dbg(grudev, "alloc gts %p\n", gts); return gts; err: @@ -381,7 +392,8 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, struct gru_vma_data *vdata = vma->vm_private_data; struct gru_thread_state *gts, *ngts; - gts = gru_alloc_gts(vma, vdata, tsid); + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count, + vdata->vd_user_options, tsid); if (!gts) return NULL; @@ -458,7 +470,8 @@ static void gru_prefetch_context(void *gseg, void *cb, void *cbe, } static void gru_load_context_data(void *save, void *grubase, int ctxnum, - unsigned long cbrmap, unsigned long dsrmap) + unsigned long cbrmap, unsigned long dsrmap, + int data_valid) { void *gseg, *cb, *cbe; unsigned long length; @@ -471,12 +484,22 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum, gru_prefetch_context(gseg, cb, cbe, cbrmap, length); for_each_cbr_in_allocation_map(i, &cbrmap, scr) { - save += gru_copy_handle(cb, save); - save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); + if (data_valid) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, + save); + } else { + memset(cb, 0, GRU_CACHE_LINE_BYTES); + memset(cbe + i * GRU_HANDLE_STRIDE, 0, + GRU_CACHE_LINE_BYTES); + } cb += GRU_HANDLE_STRIDE; } - memcpy(gseg + GRU_DS_BASE, save, length); + if (data_valid) + memcpy(gseg + GRU_DS_BASE, save, length); + else + memset(gseg + GRU_DS_BASE, 0, length); } static void gru_unload_context_data(void *save, void *grubase, int ctxnum, @@ -506,7 +529,8 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) struct gru_context_configuration_handle *cch; int ctxnum = gts->ts_ctxnum; - zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + if (!is_kernel_context(gts)) + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); gru_dbg(grudev, "gts %p\n", gts); @@ -514,11 +538,14 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) if (cch_interrupt_sync(cch)) BUG(); - gru_unload_mm_tracker(gru, gts); - if (savestate) + if (!is_kernel_context(gts)) + gru_unload_mm_tracker(gru, gts); + if (savestate) { gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, gts->ts_cbr_map, gts->ts_dsr_map); + gts->ts_data_valid = 1; + } if (cch_deallocate(cch)) BUG(); @@ -526,24 +553,22 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) unlock_cch_handle(cch); gru_free_gru_context(gts); - STAT(unload_context); } /* * Load a GRU context by copying it from the thread data structure in memory * to the GRU. */ -static void gru_load_context(struct gru_thread_state *gts) +void gru_load_context(struct gru_thread_state *gts) { struct gru_state *gru = gts->ts_gru; struct gru_context_configuration_handle *cch; - int err, asid, ctxnum = gts->ts_ctxnum; + int i, err, asid, ctxnum = gts->ts_ctxnum; gru_dbg(grudev, "gts %p\n", gts); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); lock_cch_handle(cch); - asid = gru_load_mm_tracker(gru, gts); cch->tfm_fault_bit_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); @@ -552,9 +577,32 @@ static void gru_load_context(struct gru_thread_state *gts) gts->ts_tlb_int_select = gru_cpu_fault_map_id(); cch->tlb_int_select = gts->ts_tlb_int_select; } + if (gts->ts_cch_req_slice >= 0) { + cch->req_slice_set_enable = 1; + cch->req_slice = gts->ts_cch_req_slice; + } else { + cch->req_slice_set_enable =0; + } cch->tfm_done_bit_enable = 0; - err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map, - gts->ts_dsr_map); + cch->dsr_allocation_map = gts->ts_dsr_map; + cch->cbr_allocation_map = gts->ts_cbr_map; + + if (is_kernel_context(gts)) { + cch->unmap_enable = 1; + cch->tfm_done_bit_enable = 1; + cch->cb_int_enable = 1; + } else { + cch->unmap_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->cb_int_enable = 0; + asid = gru_load_mm_tracker(gru, gts); + for (i = 0; i < 8; i++) { + cch->asid[i] = asid + i; + cch->sizeavail[i] = gts->ts_sizeavail; + } + } + + err = cch_allocate(cch); if (err) { gru_dbg(grudev, "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", @@ -563,13 +611,11 @@ static void gru_load_context(struct gru_thread_state *gts) } gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, - gts->ts_cbr_map, gts->ts_dsr_map); + gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); if (cch_start(cch)) BUG(); unlock_cch_handle(cch); - - STAT(load_context); } /* @@ -599,6 +645,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload) cch->sizeavail[i] = gts->ts_sizeavail; gts->ts_tlb_int_select = gru_cpu_fault_map_id(); cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); } else { for (i = 0; i < 8; i++) cch->asid[i] = 0; @@ -642,7 +691,28 @@ static int gru_retarget_intr(struct gru_thread_state *gts) #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ ((g)+1) : &(b)->bs_grus[0]) -static void gru_steal_context(struct gru_thread_state *gts) +static int is_gts_stealable(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) + return down_write_trylock(&bs->bs_kgts_sema); + else + return mutex_trylock(>s->ts_ctxlock); +} + +static void gts_stolen(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) { + up_write(&bs->bs_kgts_sema); + STAT(steal_kernel_context); + } else { + mutex_unlock(>s->ts_ctxlock); + STAT(steal_user_context); + } +} + +void gru_steal_context(struct gru_thread_state *gts, int blade_id) { struct gru_blade_state *blade; struct gru_state *gru, *gru0; @@ -652,8 +722,7 @@ static void gru_steal_context(struct gru_thread_state *gts) cbr = gts->ts_cbr_au_count; dsr = gts->ts_dsr_au_count; - preempt_disable(); - blade = gru_base[uv_numa_blade_id()]; + blade = gru_base[blade_id]; spin_lock(&blade->bs_lock); ctxnum = next_ctxnum(blade->bs_lru_ctxnum); @@ -676,7 +745,7 @@ static void gru_steal_context(struct gru_thread_state *gts) * success are high. If trylock fails, try to steal a * different GSEG. */ - if (ngts && mutex_trylock(&ngts->ts_ctxlock)) + if (ngts && is_gts_stealable(ngts, blade)) break; ngts = NULL; flag = 1; @@ -690,13 +759,12 @@ static void gru_steal_context(struct gru_thread_state *gts) blade->bs_lru_gru = gru; blade->bs_lru_ctxnum = ctxnum; spin_unlock(&blade->bs_lock); - preempt_enable(); if (ngts) { - STAT(steal_context); + gts->ustats.context_stolen++; ngts->ts_steal_jiffies = jiffies; - gru_unload_context(ngts, 1); - mutex_unlock(&ngts->ts_ctxlock); + gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); + gts_stolen(ngts, blade); } else { STAT(steal_context_failed); } @@ -710,17 +778,17 @@ static void gru_steal_context(struct gru_thread_state *gts) /* * Scan the GRUs on the local blade & assign a GRU context. */ -static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, + int blade) { struct gru_state *gru, *grux; int i, max_active_contexts; - preempt_disable(); again: gru = NULL; max_active_contexts = GRU_NUM_CCH; - for_each_gru_on_blade(grux, uv_numa_blade_id(), i) { + for_each_gru_on_blade(grux, blade, i) { if (check_gru_resources(grux, gts->ts_cbr_au_count, gts->ts_dsr_au_count, max_active_contexts)) { @@ -760,7 +828,6 @@ again: STAT(assign_context_failed); } - preempt_enable(); return gru; } @@ -775,6 +842,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct gru_thread_state *gts; unsigned long paddr, vaddr; + int blade_id; vaddr = (unsigned long)vmf->virtual_address; gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", @@ -789,8 +857,10 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) again: mutex_lock(>s->ts_ctxlock); preempt_disable(); + blade_id = uv_numa_blade_id(); + if (gts->ts_gru) { - if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) { + if (gts->ts_gru->gs_blade_id != blade_id) { STAT(migrated_nopfn_unload); gru_unload_context(gts, 1); } else { @@ -800,12 +870,15 @@ again: } if (!gts->ts_gru) { - if (!gru_assign_gru_context(gts)) { - mutex_unlock(>s->ts_ctxlock); + STAT(load_user_context); + if (!gru_assign_gru_context(gts, blade_id)) { preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + blade_id = uv_numa_blade_id(); if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) - gru_steal_context(gts); + gru_steal_context(gts, blade_id); goto again; } gru_load_context(gts); @@ -815,8 +888,8 @@ again: vma->vm_page_prot); } - mutex_unlock(>s->ts_ctxlock); preempt_enable(); + mutex_unlock(>s->ts_ctxlock); return VM_FAULT_NOPAGE; } diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index ee74821b171..9cbf95bedce 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -51,9 +51,12 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, assign_context); printstat(s, assign_context_failed); printstat(s, free_context); - printstat(s, load_context); - printstat(s, unload_context); - printstat(s, steal_context); + printstat(s, load_user_context); + printstat(s, load_kernel_context); + printstat(s, lock_kernel_context); + printstat(s, unlock_kernel_context); + printstat(s, steal_user_context); + printstat(s, steal_kernel_context); printstat(s, steal_context_failed); printstat(s, nopfn); printstat(s, break_cow); @@ -70,7 +73,7 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, user_flush_tlb); printstat(s, user_unload_context); printstat(s, user_exception); - printstat(s, set_task_slice); + printstat(s, set_context_option); printstat(s, migrate_check); printstat(s, migrated_retarget); printstat(s, migrated_unload); @@ -84,6 +87,9 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, tlb_dropin_fail_range_active); printstat(s, tlb_dropin_fail_idle); printstat(s, tlb_dropin_fail_fmm); + printstat(s, tlb_dropin_fail_no_exception); + printstat(s, tlb_dropin_fail_no_exception_war); + printstat(s, tfh_stale_on_fault); printstat(s, mmu_invalidate_range); printstat(s, mmu_invalidate_page); printstat(s, mmu_clear_flush_young); @@ -158,8 +164,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf, unsigned long val; char buf[80]; - if (copy_from_user - (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) + if (strncpy_from_user(buf, userbuf, sizeof(buf) - 1) < 0) return -EFAULT; buf[count - 1] = '\0'; if (!strict_strtoul(buf, 10, &val)) diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index bf1eeb7553e..34ab3d45391 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -148,11 +148,13 @@ #include <linux/wait.h> #include <linux/mmu_notifier.h> #include "gru.h" +#include "grulib.h" #include "gruhandles.h" extern struct gru_stats_s gru_stats; extern struct gru_blade_state *gru_base[]; extern unsigned long gru_start_paddr, gru_end_paddr; +extern void *gru_start_vaddr; extern unsigned int gru_max_gids; #define GRU_MAX_BLADES MAX_NUMNODES @@ -174,9 +176,12 @@ struct gru_stats_s { atomic_long_t assign_context; atomic_long_t assign_context_failed; atomic_long_t free_context; - atomic_long_t load_context; - atomic_long_t unload_context; - atomic_long_t steal_context; + atomic_long_t load_user_context; + atomic_long_t load_kernel_context; + atomic_long_t lock_kernel_context; + atomic_long_t unlock_kernel_context; + atomic_long_t steal_user_context; + atomic_long_t steal_kernel_context; atomic_long_t steal_context_failed; atomic_long_t nopfn; atomic_long_t break_cow; @@ -193,7 +198,7 @@ struct gru_stats_s { atomic_long_t user_flush_tlb; atomic_long_t user_unload_context; atomic_long_t user_exception; - atomic_long_t set_task_slice; + atomic_long_t set_context_option; atomic_long_t migrate_check; atomic_long_t migrated_retarget; atomic_long_t migrated_unload; @@ -207,6 +212,9 @@ struct gru_stats_s { atomic_long_t tlb_dropin_fail_range_active; atomic_long_t tlb_dropin_fail_idle; atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t tlb_dropin_fail_no_exception; + atomic_long_t tlb_dropin_fail_no_exception_war; + atomic_long_t tfh_stale_on_fault; atomic_long_t mmu_invalidate_range; atomic_long_t mmu_invalidate_page; atomic_long_t mmu_clear_flush_young; @@ -253,7 +261,6 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define OPT_DPRINT 1 #define OPT_STATS 2 -#define GRU_QUICKLOOK 4 #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ @@ -373,6 +380,7 @@ struct gru_thread_state { required for contest */ unsigned char ts_cbr_au_count;/* Number of CBR resources required for contest */ + char ts_cch_req_slice;/* CCH packet slice */ char ts_blade; /* If >= 0, migrate context if ref from diferent blade */ char ts_force_cch_reload; @@ -380,6 +388,9 @@ struct gru_thread_state { after migration */ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each allocated CB */ + int ts_data_valid; /* Indicates if ts_gdata has + valid data */ + struct gts_statistics ustats; /* User statistics */ unsigned long ts_gdata[0]; /* save area for GRU data (CB, DS, CBE) */ }; @@ -452,6 +463,14 @@ struct gru_blade_state { reserved cb */ void *kernel_dsr; /* First kernel reserved DSR */ + struct rw_semaphore bs_kgts_sema; /* lock for kgts */ + struct gru_thread_state *bs_kgts; /* GTS for kernel use */ + + /* ---- the following are used for managing kernel async GRU CBRs --- */ + int bs_async_dsr_bytes; /* DSRs for async */ + int bs_async_cbrs; /* CBRs AU for async */ + struct completion *bs_async_wq; + /* ---- the following are protected by the bs_lock spinlock ---- */ spinlock_t bs_lock; /* lock used for stealing contexts */ @@ -552,6 +571,12 @@ struct gru_blade_state { /* Lock hierarchy checking enabled only in emulator */ +/* 0 = lock failed, 1 = locked */ +static inline int __trylock_handle(void *h) +{ + return !test_and_set_bit(1, h); +} + static inline void __lock_handle(void *h) { while (test_and_set_bit(1, h)) @@ -563,6 +588,11 @@ static inline void __unlock_handle(void *h) clear_bit(1, h); } +static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) +{ + return __trylock_handle(cch); +} + static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) { __lock_handle(cch); @@ -584,6 +614,11 @@ static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) __unlock_handle(tgh); } +static inline int is_kernel_context(struct gru_thread_state *gts) +{ + return !gts->ts_mm; +} + /*----------------------------------------------------------------------------- * Function prototypes & externs */ @@ -598,24 +633,32 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, int tsid); extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, int tsid); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts, + int blade); +extern void gru_load_context(struct gru_thread_state *gts); +extern void gru_steal_context(struct gru_thread_state *gts, int blade_id); extern void gru_unload_context(struct gru_thread_state *gts, int savestate); extern int gru_update_cch(struct gru_thread_state *gts, int force_unload); extern void gts_drop(struct gru_thread_state *gts); extern void gru_tgh_flush_init(struct gru_state *gru); -extern int gru_kservices_init(struct gru_state *gru); -extern void gru_kservices_exit(struct gru_state *gru); +extern int gru_kservices_init(void); +extern void gru_kservices_exit(void); +extern int gru_dump_chiplet_request(unsigned long arg); +extern long gru_get_gseg_statistics(unsigned long arg); extern irqreturn_t gru_intr(int irq, void *dev_id); extern int gru_handle_user_call_os(unsigned long address); extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); -extern int gru_set_task_slice(long address); +extern int gru_set_context_option(unsigned long address); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); extern int gru_proc_init(void); extern void gru_proc_exit(void); +extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, int options, int tsid); extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, char *cbmap); extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, @@ -624,6 +667,7 @@ extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); extern struct gru_mm_struct *gru_register_mmu_notifier(void); extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); +extern int gru_ktest(unsigned long arg); extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, unsigned long len); diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig new file mode 100644 index 00000000000..cc2eb8edb51 --- /dev/null +++ b/drivers/pps/Kconfig @@ -0,0 +1,33 @@ +# +# PPS support configuration +# + +menu "PPS support" + +config PPS + tristate "PPS support" + depends on EXPERIMENTAL + ---help--- + PPS (Pulse Per Second) is a special pulse provided by some GPS + antennae. Userland can use it to get a high-precision time + reference. + + Some antennae's PPS signals are connected with the CD (Carrier + Detect) pin of the serial line they use to communicate with the + host. In this case use the SERIAL_LINE client support. + + Some antennae's PPS signals are connected with some special host + inputs so you have to enable the corresponding client support. + + To compile this driver as a module, choose M here: the module + will be called pps_core.ko. + +config PPS_DEBUG + bool "PPS debugging messages" + depends on PPS + help + Say Y here if you want the PPS support to produce a bunch of debug + messages to the system log. Select this if you are having a + problem with PPS support and want to see more of what is going on. + +endmenu diff --git a/drivers/pps/Makefile b/drivers/pps/Makefile new file mode 100644 index 00000000000..19ea582f431 --- /dev/null +++ b/drivers/pps/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the PPS core. +# + +pps_core-y := pps.o kapi.o sysfs.o +obj-$(CONFIG_PPS) := pps_core.o + +ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c new file mode 100644 index 00000000000..35a0b192d76 --- /dev/null +++ b/drivers/pps/kapi.c @@ -0,0 +1,329 @@ +/* + * kernel API + * + * + * Copyright (C) 2005-2009 Rodolfo Giometti <giometti@linux.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/fs.h> +#include <linux/pps_kernel.h> + +/* + * Global variables + */ + +DEFINE_SPINLOCK(pps_idr_lock); +DEFINE_IDR(pps_idr); + +/* + * Local functions + */ + +static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset) +{ + ts->nsec += offset->nsec; + while (ts->nsec >= NSEC_PER_SEC) { + ts->nsec -= NSEC_PER_SEC; + ts->sec++; + } + while (ts->nsec < 0) { + ts->nsec += NSEC_PER_SEC; + ts->sec--; + } + ts->sec += offset->sec; +} + +/* + * Exported functions + */ + +/* pps_get_source - find a PPS source + * @source: the PPS source ID. + * + * This function is used to find an already registered PPS source into the + * system. + * + * The function returns NULL if found nothing, otherwise it returns a pointer + * to the PPS source data struct (the refcounter is incremented by 1). + */ + +struct pps_device *pps_get_source(int source) +{ + struct pps_device *pps; + unsigned long flags; + + spin_lock_irqsave(&pps_idr_lock, flags); + + pps = idr_find(&pps_idr, source); + if (pps != NULL) + atomic_inc(&pps->usage); + + spin_unlock_irqrestore(&pps_idr_lock, flags); + + return pps; +} + +/* pps_put_source - free the PPS source data + * @pps: a pointer to the PPS source. + * + * This function is used to free a PPS data struct if its refcount is 0. + */ + +void pps_put_source(struct pps_device *pps) +{ + unsigned long flags; + + spin_lock_irqsave(&pps_idr_lock, flags); + BUG_ON(atomic_read(&pps->usage) == 0); + + if (!atomic_dec_and_test(&pps->usage)) { + pps = NULL; + goto exit; + } + + /* No more reference to the PPS source. We can safely remove the + * PPS data struct. + */ + idr_remove(&pps_idr, pps->id); + +exit: + spin_unlock_irqrestore(&pps_idr_lock, flags); + kfree(pps); +} + +/* pps_register_source - add a PPS source in the system + * @info: the PPS info struct + * @default_params: the default PPS parameters of the new source + * + * This function is used to add a new PPS source in the system. The new + * source is described by info's fields and it will have, as default PPS + * parameters, the ones specified into default_params. + * + * The function returns, in case of success, the PPS source ID. + */ + +int pps_register_source(struct pps_source_info *info, int default_params) +{ + struct pps_device *pps; + int id; + int err; + + /* Sanity checks */ + if ((info->mode & default_params) != default_params) { + printk(KERN_ERR "pps: %s: unsupported default parameters\n", + info->name); + err = -EINVAL; + goto pps_register_source_exit; + } + if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 && + info->echo == NULL) { + printk(KERN_ERR "pps: %s: echo function is not defined\n", + info->name); + err = -EINVAL; + goto pps_register_source_exit; + } + if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { + printk(KERN_ERR "pps: %s: unspecified time format\n", + info->name); + err = -EINVAL; + goto pps_register_source_exit; + } + + /* Allocate memory for the new PPS source struct */ + pps = kzalloc(sizeof(struct pps_device), GFP_KERNEL); + if (pps == NULL) { + err = -ENOMEM; + goto pps_register_source_exit; + } + + /* These initializations must be done before calling idr_get_new() + * in order to avoid reces into pps_event(). + */ + pps->params.api_version = PPS_API_VERS; + pps->params.mode = default_params; + pps->info = *info; + + init_waitqueue_head(&pps->queue); + spin_lock_init(&pps->lock); + atomic_set(&pps->usage, 1); + + /* Get new ID for the new PPS source */ + if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) { + err = -ENOMEM; + goto kfree_pps; + } + + spin_lock_irq(&pps_idr_lock); + + /* Now really allocate the PPS source. + * After idr_get_new() calling the new source will be freely available + * into the kernel. + */ + err = idr_get_new(&pps_idr, pps, &id); + if (err < 0) { + spin_unlock_irq(&pps_idr_lock); + goto kfree_pps; + } + + id = id & MAX_ID_MASK; + if (id >= PPS_MAX_SOURCES) { + spin_unlock_irq(&pps_idr_lock); + + printk(KERN_ERR "pps: %s: too many PPS sources in the system\n", + info->name); + err = -EBUSY; + goto free_idr; + } + pps->id = id; + + spin_unlock_irq(&pps_idr_lock); + + /* Create the char device */ + err = pps_register_cdev(pps); + if (err < 0) { + printk(KERN_ERR "pps: %s: unable to create char device\n", + info->name); + goto free_idr; + } + + pr_info("new PPS source %s at ID %d\n", info->name, id); + + return id; + +free_idr: + spin_lock_irq(&pps_idr_lock); + idr_remove(&pps_idr, id); + spin_unlock_irq(&pps_idr_lock); + +kfree_pps: + kfree(pps); + +pps_register_source_exit: + printk(KERN_ERR "pps: %s: unable to register source\n", info->name); + + return err; +} +EXPORT_SYMBOL(pps_register_source); + +/* pps_unregister_source - remove a PPS source from the system + * @source: the PPS source ID + * + * This function is used to remove a previously registered PPS source from + * the system. + */ + +void pps_unregister_source(int source) +{ + struct pps_device *pps; + + spin_lock_irq(&pps_idr_lock); + pps = idr_find(&pps_idr, source); + + if (!pps) { + BUG(); + spin_unlock_irq(&pps_idr_lock); + return; + } + spin_unlock_irq(&pps_idr_lock); + + pps_unregister_cdev(pps); + pps_put_source(pps); +} +EXPORT_SYMBOL(pps_unregister_source); + +/* pps_event - register a PPS event into the system + * @source: the PPS source ID + * @ts: the event timestamp + * @event: the event type + * @data: userdef pointer + * + * This function is used by each PPS client in order to register a new + * PPS event into the system (it's usually called inside an IRQ handler). + * + * If an echo function is associated with the PPS source it will be called + * as: + * pps->info.echo(source, event, data); + */ + +void pps_event(int source, struct pps_ktime *ts, int event, void *data) +{ + struct pps_device *pps; + unsigned long flags; + + if ((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0) { + printk(KERN_ERR "pps: unknown event (%x) for source %d\n", + event, source); + return; + } + + pps = pps_get_source(source); + if (!pps) + return; + + pr_debug("PPS event on source %d at %llu.%06u\n", + pps->id, (unsigned long long) ts->sec, ts->nsec); + + spin_lock_irqsave(&pps->lock, flags); + + /* Must call the echo function? */ + if ((pps->params.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR))) + pps->info.echo(source, event, data); + + /* Check the event */ + pps->current_mode = pps->params.mode; + if (event & PPS_CAPTUREASSERT) { + /* We have to add an offset? */ + if (pps->params.mode & PPS_OFFSETASSERT) + pps_add_offset(ts, &pps->params.assert_off_tu); + + /* Save the time stamp */ + pps->assert_tu = *ts; + pps->assert_sequence++; + pr_debug("capture assert seq #%u for source %d\n", + pps->assert_sequence, source); + } + if (event & PPS_CAPTURECLEAR) { + /* We have to add an offset? */ + if (pps->params.mode & PPS_OFFSETCLEAR) + pps_add_offset(ts, &pps->params.clear_off_tu); + + /* Save the time stamp */ + pps->clear_tu = *ts; + pps->clear_sequence++; + pr_debug("capture clear seq #%u for source %d\n", + pps->clear_sequence, source); + } + + pps->go = ~0; + wake_up_interruptible(&pps->queue); + + kill_fasync(&pps->async_queue, SIGIO, POLL_IN); + + spin_unlock_irqrestore(&pps->lock, flags); + + /* Now we can release the PPS source for (possible) deregistration */ + pps_put_source(pps); +} +EXPORT_SYMBOL(pps_event); diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c new file mode 100644 index 00000000000..ac8cc8cea1e --- /dev/null +++ b/drivers/pps/pps.c @@ -0,0 +1,312 @@ +/* + * PPS core file + * + * + * Copyright (C) 2005-2009 Rodolfo Giometti <giometti@linux.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/uaccess.h> +#include <linux/idr.h> +#include <linux/cdev.h> +#include <linux/poll.h> +#include <linux/pps_kernel.h> + +/* + * Local variables + */ + +static dev_t pps_devt; +static struct class *pps_class; + +/* + * Char device methods + */ + +static unsigned int pps_cdev_poll(struct file *file, poll_table *wait) +{ + struct pps_device *pps = file->private_data; + + poll_wait(file, &pps->queue, wait); + + return POLLIN | POLLRDNORM; +} + +static int pps_cdev_fasync(int fd, struct file *file, int on) +{ + struct pps_device *pps = file->private_data; + return fasync_helper(fd, file, on, &pps->async_queue); +} + +static long pps_cdev_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct pps_device *pps = file->private_data; + struct pps_kparams params; + struct pps_fdata fdata; + unsigned long ticks; + void __user *uarg = (void __user *) arg; + int __user *iuarg = (int __user *) arg; + int err; + + switch (cmd) { + case PPS_GETPARAMS: + pr_debug("PPS_GETPARAMS: source %d\n", pps->id); + + /* Return current parameters */ + err = copy_to_user(uarg, &pps->params, + sizeof(struct pps_kparams)); + if (err) + return -EFAULT; + + break; + + case PPS_SETPARAMS: + pr_debug("PPS_SETPARAMS: source %d\n", pps->id); + + /* Check the capabilities */ + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + err = copy_from_user(¶ms, uarg, sizeof(struct pps_kparams)); + if (err) + return -EFAULT; + if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) { + pr_debug("capture mode unspecified (%x)\n", + params.mode); + return -EINVAL; + } + + /* Check for supported capabilities */ + if ((params.mode & ~pps->info.mode) != 0) { + pr_debug("unsupported capabilities (%x)\n", + params.mode); + return -EINVAL; + } + + spin_lock_irq(&pps->lock); + + /* Save the new parameters */ + pps->params = params; + + /* Restore the read only parameters */ + if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { + /* section 3.3 of RFC 2783 interpreted */ + pr_debug("time format unspecified (%x)\n", + params.mode); + pps->params.mode |= PPS_TSFMT_TSPEC; + } + if (pps->info.mode & PPS_CANWAIT) + pps->params.mode |= PPS_CANWAIT; + pps->params.api_version = PPS_API_VERS; + + spin_unlock_irq(&pps->lock); + + break; + + case PPS_GETCAP: + pr_debug("PPS_GETCAP: source %d\n", pps->id); + + err = put_user(pps->info.mode, iuarg); + if (err) + return -EFAULT; + + break; + + case PPS_FETCH: + pr_debug("PPS_FETCH: source %d\n", pps->id); + + err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata)); + if (err) + return -EFAULT; + + pps->go = 0; + + /* Manage the timeout */ + if (fdata.timeout.flags & PPS_TIME_INVALID) + err = wait_event_interruptible(pps->queue, pps->go); + else { + pr_debug("timeout %lld.%09d\n", + (long long) fdata.timeout.sec, + fdata.timeout.nsec); + ticks = fdata.timeout.sec * HZ; + ticks += fdata.timeout.nsec / (NSEC_PER_SEC / HZ); + + if (ticks != 0) { + err = wait_event_interruptible_timeout( + pps->queue, pps->go, ticks); + if (err == 0) + return -ETIMEDOUT; + } + } + + /* Check for pending signals */ + if (err == -ERESTARTSYS) { + pr_debug("pending signal caught\n"); + return -EINTR; + } + + /* Return the fetched timestamp */ + spin_lock_irq(&pps->lock); + + fdata.info.assert_sequence = pps->assert_sequence; + fdata.info.clear_sequence = pps->clear_sequence; + fdata.info.assert_tu = pps->assert_tu; + fdata.info.clear_tu = pps->clear_tu; + fdata.info.current_mode = pps->current_mode; + + spin_unlock_irq(&pps->lock); + + err = copy_to_user(uarg, &fdata, sizeof(struct pps_fdata)); + if (err) + return -EFAULT; + + break; + + default: + return -ENOTTY; + break; + } + + return 0; +} + +static int pps_cdev_open(struct inode *inode, struct file *file) +{ + struct pps_device *pps = container_of(inode->i_cdev, + struct pps_device, cdev); + int found; + + found = pps_get_source(pps->id) != 0; + if (!found) + return -ENODEV; + + file->private_data = pps; + + return 0; +} + +static int pps_cdev_release(struct inode *inode, struct file *file) +{ + struct pps_device *pps = file->private_data; + + /* Free the PPS source and wake up (possible) deregistration */ + pps_put_source(pps); + + return 0; +} + +/* + * Char device stuff + */ + +static const struct file_operations pps_cdev_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .poll = pps_cdev_poll, + .fasync = pps_cdev_fasync, + .unlocked_ioctl = pps_cdev_ioctl, + .open = pps_cdev_open, + .release = pps_cdev_release, +}; + +int pps_register_cdev(struct pps_device *pps) +{ + int err; + + pps->devno = MKDEV(MAJOR(pps_devt), pps->id); + cdev_init(&pps->cdev, &pps_cdev_fops); + pps->cdev.owner = pps->info.owner; + + err = cdev_add(&pps->cdev, pps->devno, 1); + if (err) { + printk(KERN_ERR "pps: %s: failed to add char device %d:%d\n", + pps->info.name, MAJOR(pps_devt), pps->id); + return err; + } + pps->dev = device_create(pps_class, pps->info.dev, pps->devno, NULL, + "pps%d", pps->id); + if (err) + goto del_cdev; + dev_set_drvdata(pps->dev, pps); + + pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, + MAJOR(pps_devt), pps->id); + + return 0; + +del_cdev: + cdev_del(&pps->cdev); + + return err; +} + +void pps_unregister_cdev(struct pps_device *pps) +{ + device_destroy(pps_class, pps->devno); + cdev_del(&pps->cdev); +} + +/* + * Module stuff + */ + +static void __exit pps_exit(void) +{ + class_destroy(pps_class); + unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES); +} + +static int __init pps_init(void) +{ + int err; + + pps_class = class_create(THIS_MODULE, "pps"); + if (!pps_class) { + printk(KERN_ERR "pps: failed to allocate class\n"); + return -ENOMEM; + } + pps_class->dev_attrs = pps_attrs; + + err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps"); + if (err < 0) { + printk(KERN_ERR "pps: failed to allocate char device region\n"); + goto remove_class; + } + + pr_info("LinuxPPS API ver. %d registered\n", PPS_API_VERS); + pr_info("Software ver. %s - Copyright 2005-2007 Rodolfo Giometti " + "<giometti@linux.it>\n", PPS_VERSION); + + return 0; + +remove_class: + class_destroy(pps_class); + + return err; +} + +subsys_initcall(pps_init); +module_exit(pps_exit); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("LinuxPPS support (RFC 2783) - ver. " PPS_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/pps/sysfs.c b/drivers/pps/sysfs.c new file mode 100644 index 00000000000..ef0978c71ee --- /dev/null +++ b/drivers/pps/sysfs.c @@ -0,0 +1,98 @@ +/* + * PPS sysfs support + * + * + * Copyright (C) 2007-2009 Rodolfo Giometti <giometti@linux.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/pps_kernel.h> + +/* + * Attribute functions + */ + +static ssize_t pps_show_assert(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + if (!(pps->info.mode & PPS_CAPTUREASSERT)) + return 0; + + return sprintf(buf, "%lld.%09d#%d\n", + (long long) pps->assert_tu.sec, pps->assert_tu.nsec, + pps->assert_sequence); +} + +static ssize_t pps_show_clear(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + if (!(pps->info.mode & PPS_CAPTURECLEAR)) + return 0; + + return sprintf(buf, "%lld.%09d#%d\n", + (long long) pps->clear_tu.sec, pps->clear_tu.nsec, + pps->clear_sequence); +} + +static ssize_t pps_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + return sprintf(buf, "%4x\n", pps->info.mode); +} + +static ssize_t pps_show_echo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", !!pps->info.echo); +} + +static ssize_t pps_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", pps->info.name); +} + +static ssize_t pps_show_path(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pps_device *pps = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", pps->info.path); +} + +struct device_attribute pps_attrs[] = { + __ATTR(assert, S_IRUGO, pps_show_assert, NULL), + __ATTR(clear, S_IRUGO, pps_show_clear, NULL), + __ATTR(mode, S_IRUGO, pps_show_mode, NULL), + __ATTR(echo, S_IRUGO, pps_show_echo, NULL), + __ATTR(name, S_IRUGO, pps_show_name, NULL), + __ATTR(path, S_IRUGO, pps_show_path, NULL), + __ATTR_NULL, +}; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 277d35d232f..81adbdbd504 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -296,6 +296,15 @@ config RTC_DRV_RX8581 This driver can also be built as a module. If so the module will be called rtc-rx8581. +config RTC_DRV_RX8025 + tristate "Epson RX-8025SA/NB" + help + If you say yes here you get support for the Epson + RX-8025SA/NB RTC chips. + + This driver can also be built as a module. If so, the module + will be called rtc-rx8025. + endif # I2C comment "SPI RTC drivers" diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 6c0639a14f0..3c0f2b2ac92 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o +obj-$(CONFIG_RTC_DRV_RX8025) += rtc-rx8025.o obj-$(CONFIG_RTC_DRV_RX8581) += rtc-rx8581.o obj-$(CONFIG_RTC_DRV_S35390A) += rtc-s35390a.o obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 2c4a65302a9..8a6f9a9f9cb 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -31,6 +31,8 @@ enum ds_type { ds_1338, ds_1339, ds_1340, + ds_1388, + ds_3231, m41t00, rx_8025, // rs5c372 too? different address... @@ -66,6 +68,7 @@ enum ds_type { #define DS1337_REG_CONTROL 0x0e # define DS1337_BIT_nEOSC 0x80 # define DS1339_BIT_BBSQI 0x20 +# define DS3231_BIT_BBSQW 0x40 /* same as BBSQI */ # define DS1337_BIT_RS2 0x10 # define DS1337_BIT_RS1 0x08 # define DS1337_BIT_INTCN 0x04 @@ -94,6 +97,7 @@ enum ds_type { struct ds1307 { + u8 offset; /* register's offset */ u8 regs[11]; enum ds_type type; unsigned long flags; @@ -128,6 +132,9 @@ static const struct chip_desc chips[] = { }, [ds_1340] = { }, +[ds_3231] = { + .alarm = 1, +}, [m41t00] = { }, [rx_8025] = { @@ -138,7 +145,9 @@ static const struct i2c_device_id ds1307_id[] = { { "ds1337", ds_1337 }, { "ds1338", ds_1338 }, { "ds1339", ds_1339 }, + { "ds1388", ds_1388 }, { "ds1340", ds_1340 }, + { "ds3231", ds_3231 }, { "m41t00", m41t00 }, { "rx8025", rx_8025 }, { } @@ -291,7 +300,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t) /* read the RTC date and time registers all at once */ tmp = ds1307->read_block_data(ds1307->client, - DS1307_REG_SECS, 7, ds1307->regs); + ds1307->offset, 7, ds1307->regs); if (tmp != 7) { dev_err(dev, "%s error %d\n", "read", tmp); return -EIO; @@ -353,6 +362,7 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) switch (ds1307->type) { case ds_1337: case ds_1339: + case ds_3231: buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY; break; case ds_1340: @@ -367,7 +377,8 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) "write", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]); - result = ds1307->write_block_data(ds1307->client, 0, 7, buf); + result = ds1307->write_block_data(ds1307->client, + ds1307->offset, 7, buf); if (result < 0) { dev_err(dev, "%s error %d\n", "write", result); return result; @@ -624,6 +635,11 @@ static int __devinit ds1307_probe(struct i2c_client *client, struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); int want_irq = false; unsigned char *buf; + static const int bbsqi_bitpos[] = { + [ds_1337] = 0, + [ds_1339] = DS1339_BIT_BBSQI, + [ds_3231] = DS3231_BIT_BBSQW, + }; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA) && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) @@ -632,9 +648,12 @@ static int __devinit ds1307_probe(struct i2c_client *client, if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL))) return -ENOMEM; - ds1307->client = client; i2c_set_clientdata(client, ds1307); - ds1307->type = id->driver_data; + + ds1307->client = client; + ds1307->type = id->driver_data; + ds1307->offset = 0; + buf = ds1307->regs; if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { ds1307->read_block_data = i2c_smbus_read_i2c_block_data; @@ -647,6 +666,7 @@ static int __devinit ds1307_probe(struct i2c_client *client, switch (ds1307->type) { case ds_1337: case ds_1339: + case ds_3231: /* has IRQ? */ if (ds1307->client->irq > 0 && chip->alarm) { INIT_WORK(&ds1307->work, ds1307_work); @@ -666,12 +686,12 @@ static int __devinit ds1307_probe(struct i2c_client *client, ds1307->regs[0] &= ~DS1337_BIT_nEOSC; /* Using IRQ? Disable the square wave and both alarms. - * For ds1339, be sure alarms can trigger when we're - * running on Vbackup (BBSQI); we assume ds1337 will - * ignore that bit + * For some variants, be sure alarms can trigger when we're + * running on Vbackup (BBSQI/BBSQW) */ if (want_irq) { - ds1307->regs[0] |= DS1337_BIT_INTCN | DS1339_BIT_BBSQI; + ds1307->regs[0] |= DS1337_BIT_INTCN + | bbsqi_bitpos[ds1307->type]; ds1307->regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE); } @@ -751,6 +771,9 @@ static int __devinit ds1307_probe(struct i2c_client *client, hour); } break; + case ds_1388: + ds1307->offset = 1; /* Seconds starts at 1 */ + break; default: break; } @@ -814,6 +837,8 @@ read_rtc: case rx_8025: case ds_1337: case ds_1339: + case ds_1388: + case ds_3231: break; } diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index 38d472b6340..717288527c6 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -329,8 +329,7 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev) if (pdata->irq > 0) { writeb(0, ioaddr + RTC_INTERRUPTS); if (request_irq(pdata->irq, ds1553_rtc_interrupt, - IRQF_DISABLED | IRQF_SHARED, - pdev->name, pdev) < 0) { + IRQF_DISABLED, pdev->name, pdev) < 0) { dev_warn(&pdev->dev, "interrupt not available.\n"); pdata->irq = 0; } diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 8bc8501bffc..09249459e9a 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -57,6 +57,7 @@ struct rtc_plat_data { size_t size; resource_size_t baseaddr; unsigned long last_jiffies; + struct bin_attribute nvram_attr; }; static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm) @@ -157,18 +158,6 @@ static ssize_t ds1742_nvram_write(struct kobject *kobj, return count; } -static struct bin_attribute ds1742_nvram_attr = { - .attr = { - .name = "nvram", - .mode = S_IRUGO | S_IWUSR, - }, - .read = ds1742_nvram_read, - .write = ds1742_nvram_write, - /* REVISIT: size in sysfs won't match actual size... if it's - * not a constant, each RTC should have its own attribute. - */ -}; - static int __devinit ds1742_rtc_probe(struct platform_device *pdev) { struct rtc_device *rtc; @@ -199,6 +188,12 @@ static int __devinit ds1742_rtc_probe(struct platform_device *pdev) pdata->size_nvram = pdata->size - RTC_SIZE; pdata->ioaddr_rtc = ioaddr + pdata->size_nvram; + pdata->nvram_attr.attr.name = "nvram"; + pdata->nvram_attr.attr.mode = S_IRUGO | S_IWUSR; + pdata->nvram_attr.read = ds1742_nvram_read; + pdata->nvram_attr.write = ds1742_nvram_write; + pdata->nvram_attr.size = pdata->size_nvram; + /* turn RTC on if it was not on */ ioaddr = pdata->ioaddr_rtc; sec = readb(ioaddr + RTC_SECONDS); @@ -221,11 +216,13 @@ static int __devinit ds1742_rtc_probe(struct platform_device *pdev) pdata->rtc = rtc; pdata->last_jiffies = jiffies; platform_set_drvdata(pdev, pdata); - ds1742_nvram_attr.size = max(ds1742_nvram_attr.size, - pdata->size_nvram); - ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr); - if (ret) + + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); + if (ret) { + dev_err(&pdev->dev, "creating nvram file in sysfs failed\n"); goto out; + } + return 0; out: if (pdata->rtc) @@ -242,7 +239,7 @@ static int __devexit ds1742_rtc_remove(struct platform_device *pdev) { struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - sysfs_remove_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr); + sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr); rtc_device_unregister(pdata->rtc); iounmap(pdata->ioaddr_nvram); release_mem_region(pdata->baseaddr, pdata->size); diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c new file mode 100644 index 00000000000..b1a29bcfdf1 --- /dev/null +++ b/drivers/rtc/rtc-rx8025.c @@ -0,0 +1,688 @@ +/* + * Driver for Epson's RTC module RX-8025 SA/NB + * + * Copyright (C) 2009 Wolfgang Grandegger <wg@grandegger.com> + * + * Copyright (C) 2005 by Digi International Inc. + * All rights reserved. + * + * Modified by fengjh at rising.com.cn + * <http://lists.lm-sensors.org/mailman/listinfo/lm-sensors> + * 2006.11 + * + * Code cleanup by Sergei Poselenov, <sposelenov@emcraft.com> + * Converted to new style by Wolfgang Grandegger <wg@grandegger.com> + * Alarm and periodic interrupt added by Dmitry Rakhchev <rda@emcraft.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bcd.h> +#include <linux/i2c.h> +#include <linux/list.h> +#include <linux/rtc.h> + +/* Register definitions */ +#define RX8025_REG_SEC 0x00 +#define RX8025_REG_MIN 0x01 +#define RX8025_REG_HOUR 0x02 +#define RX8025_REG_WDAY 0x03 +#define RX8025_REG_MDAY 0x04 +#define RX8025_REG_MONTH 0x05 +#define RX8025_REG_YEAR 0x06 +#define RX8025_REG_DIGOFF 0x07 +#define RX8025_REG_ALWMIN 0x08 +#define RX8025_REG_ALWHOUR 0x09 +#define RX8025_REG_ALWWDAY 0x0a +#define RX8025_REG_ALDMIN 0x0b +#define RX8025_REG_ALDHOUR 0x0c +/* 0x0d is reserved */ +#define RX8025_REG_CTRL1 0x0e +#define RX8025_REG_CTRL2 0x0f + +#define RX8025_BIT_CTRL1_CT (7 << 0) +/* 1 Hz periodic level irq */ +#define RX8025_BIT_CTRL1_CT_1HZ 4 +#define RX8025_BIT_CTRL1_TEST (1 << 3) +#define RX8025_BIT_CTRL1_1224 (1 << 5) +#define RX8025_BIT_CTRL1_DALE (1 << 6) +#define RX8025_BIT_CTRL1_WALE (1 << 7) + +#define RX8025_BIT_CTRL2_DAFG (1 << 0) +#define RX8025_BIT_CTRL2_WAFG (1 << 1) +#define RX8025_BIT_CTRL2_CTFG (1 << 2) +#define RX8025_BIT_CTRL2_PON (1 << 4) +#define RX8025_BIT_CTRL2_XST (1 << 5) +#define RX8025_BIT_CTRL2_VDET (1 << 6) + +/* Clock precision adjustment */ +#define RX8025_ADJ_RESOLUTION 3050 /* in ppb */ +#define RX8025_ADJ_DATA_MAX 62 +#define RX8025_ADJ_DATA_MIN -62 + +static const struct i2c_device_id rx8025_id[] = { + { "rx8025", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rx8025_id); + +struct rx8025_data { + struct i2c_client *client; + struct rtc_device *rtc; + struct work_struct work; + u8 ctrl1; + unsigned exiting:1; +}; + +static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value) +{ + int ret = i2c_smbus_read_byte_data(client, (number << 4) | 0x08); + + if (ret < 0) { + dev_err(&client->dev, "Unable to read register #%d\n", number); + return ret; + } + + *value = ret; + return 0; +} + +static int rx8025_read_regs(struct i2c_client *client, + int number, u8 length, u8 *values) +{ + int ret = i2c_smbus_read_i2c_block_data(client, (number << 4) | 0x08, + length, values); + + if (ret != length) { + dev_err(&client->dev, "Unable to read registers #%d..#%d\n", + number, number + length - 1); + return ret < 0 ? ret : -EIO; + } + + return 0; +} + +static int rx8025_write_reg(struct i2c_client *client, int number, u8 value) +{ + int ret = i2c_smbus_write_byte_data(client, number << 4, value); + + if (ret) + dev_err(&client->dev, "Unable to write register #%d\n", + number); + + return ret; +} + +static int rx8025_write_regs(struct i2c_client *client, + int number, u8 length, u8 *values) +{ + int ret = i2c_smbus_write_i2c_block_data(client, (number << 4) | 0x08, + length, values); + + if (ret) + dev_err(&client->dev, "Unable to write registers #%d..#%d\n", + number, number + length - 1); + + return ret; +} + +static irqreturn_t rx8025_irq(int irq, void *dev_id) +{ + struct i2c_client *client = dev_id; + struct rx8025_data *rx8025 = i2c_get_clientdata(client); + + disable_irq_nosync(irq); + schedule_work(&rx8025->work); + return IRQ_HANDLED; +} + +static void rx8025_work(struct work_struct *work) +{ + struct rx8025_data *rx8025 = container_of(work, struct rx8025_data, + work); + struct i2c_client *client = rx8025->client; + struct mutex *lock = &rx8025->rtc->ops_lock; + u8 status; + + mutex_lock(lock); + + if (rx8025_read_reg(client, RX8025_REG_CTRL2, &status)) + goto out; + + if (!(status & RX8025_BIT_CTRL2_XST)) + dev_warn(&client->dev, "Oscillation stop was detected," + "you may have to readjust the clock\n"); + + if (status & RX8025_BIT_CTRL2_CTFG) { + /* periodic */ + status &= ~RX8025_BIT_CTRL2_CTFG; + local_irq_disable(); + rtc_update_irq(rx8025->rtc, 1, RTC_PF | RTC_IRQF); + local_irq_enable(); + } + + if (status & RX8025_BIT_CTRL2_DAFG) { + /* alarm */ + status &= RX8025_BIT_CTRL2_DAFG; + if (rx8025_write_reg(client, RX8025_REG_CTRL1, + rx8025->ctrl1 & ~RX8025_BIT_CTRL1_DALE)) + goto out; + local_irq_disable(); + rtc_update_irq(rx8025->rtc, 1, RTC_AF | RTC_IRQF); + local_irq_enable(); + } + + /* acknowledge IRQ */ + rx8025_write_reg(client, RX8025_REG_CTRL2, + status | RX8025_BIT_CTRL2_XST); + +out: + if (!rx8025->exiting) + enable_irq(client->irq); + + mutex_unlock(lock); +} + +static int rx8025_get_time(struct device *dev, struct rtc_time *dt) +{ + struct rx8025_data *rx8025 = dev_get_drvdata(dev); + u8 date[7]; + int err; + + err = rx8025_read_regs(rx8025->client, RX8025_REG_SEC, 7, date); + if (err) + return err; + + dev_dbg(dev, "%s: read 0x%02x 0x%02x " + "0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n", __func__, + date[0], date[1], date[2], date[3], date[4], + date[5], date[6]); + + dt->tm_sec = bcd2bin(date[RX8025_REG_SEC] & 0x7f); + dt->tm_min = bcd2bin(date[RX8025_REG_MIN] & 0x7f); + if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x3f); + else + dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x1f) % 12 + + (date[RX8025_REG_HOUR] & 0x20 ? 12 : 0); + + dt->tm_mday = bcd2bin(date[RX8025_REG_MDAY] & 0x3f); + dt->tm_mon = bcd2bin(date[RX8025_REG_MONTH] & 0x1f) - 1; + dt->tm_year = bcd2bin(date[RX8025_REG_YEAR]); + + if (dt->tm_year < 70) + dt->tm_year += 100; + + dev_dbg(dev, "%s: date %ds %dm %dh %dmd %dm %dy\n", __func__, + dt->tm_sec, dt->tm_min, dt->tm_hour, + dt->tm_mday, dt->tm_mon, dt->tm_year); + + return rtc_valid_tm(dt); +} + +static int rx8025_set_time(struct device *dev, struct rtc_time *dt) +{ + struct rx8025_data *rx8025 = dev_get_drvdata(dev); + u8 date[7]; + + /* + * BUG: The HW assumes every year that is a multiple of 4 to be a leap + * year. Next time this is wrong is 2100, which will not be a leap + * year. + */ + + /* + * Here the read-only bits are written as "0". I'm not sure if that + * is sound. + */ + date[RX8025_REG_SEC] = bin2bcd(dt->tm_sec); + date[RX8025_REG_MIN] = bin2bcd(dt->tm_min); + if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + date[RX8025_REG_HOUR] = bin2bcd(dt->tm_hour); + else + date[RX8025_REG_HOUR] = (dt->tm_hour >= 12 ? 0x20 : 0) + | bin2bcd((dt->tm_hour + 11) % 12 + 1); + + date[RX8025_REG_WDAY] = bin2bcd(dt->tm_wday); + date[RX8025_REG_MDAY] = bin2bcd(dt->tm_mday); + date[RX8025_REG_MONTH] = bin2bcd(dt->tm_mon + 1); + date[RX8025_REG_YEAR] = bin2bcd(dt->tm_year % 100); + + dev_dbg(dev, + "%s: write 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n", + __func__, + date[0], date[1], date[2], date[3], date[4], date[5], date[6]); + + return rx8025_write_regs(rx8025->client, RX8025_REG_SEC, 7, date); +} + +static int rx8025_init_client(struct i2c_client *client, int *need_reset) +{ + struct rx8025_data *rx8025 = i2c_get_clientdata(client); + u8 ctrl[2], ctrl2; + int need_clear = 0; + int err; + + err = rx8025_read_regs(rx8025->client, RX8025_REG_CTRL1, 2, ctrl); + if (err) + goto out; + + /* Keep test bit zero ! */ + rx8025->ctrl1 = ctrl[0] & ~RX8025_BIT_CTRL1_TEST; + + if (ctrl[1] & RX8025_BIT_CTRL2_PON) { + dev_warn(&client->dev, "power-on reset was detected, " + "you may have to readjust the clock\n"); + *need_reset = 1; + } + + if (ctrl[1] & RX8025_BIT_CTRL2_VDET) { + dev_warn(&client->dev, "a power voltage drop was detected, " + "you may have to readjust the clock\n"); + *need_reset = 1; + } + + if (!(ctrl[1] & RX8025_BIT_CTRL2_XST)) { + dev_warn(&client->dev, "Oscillation stop was detected," + "you may have to readjust the clock\n"); + *need_reset = 1; + } + + if (ctrl[1] & (RX8025_BIT_CTRL2_DAFG | RX8025_BIT_CTRL2_WAFG)) { + dev_warn(&client->dev, "Alarm was detected\n"); + need_clear = 1; + } + + if (!(ctrl[1] & RX8025_BIT_CTRL2_CTFG)) + need_clear = 1; + + if (*need_reset || need_clear) { + ctrl2 = ctrl[0]; + ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET | + RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG | + RX8025_BIT_CTRL2_DAFG); + ctrl2 |= RX8025_BIT_CTRL2_XST; + + err = rx8025_write_reg(client, RX8025_REG_CTRL2, ctrl2); + } +out: + return err; +} + +/* Alarm support */ +static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t) +{ + struct rx8025_data *rx8025 = dev_get_drvdata(dev); + struct i2c_client *client = rx8025->client; + u8 ctrl2, ald[2]; + int err; + + if (client->irq <= 0) + return -EINVAL; + + err = rx8025_read_regs(client, RX8025_REG_ALDMIN, 2, ald); + if (err) + return err; + + err = rx8025_read_reg(client, RX8025_REG_CTRL2, &ctrl2); + if (err) + return err; + + dev_dbg(dev, "%s: read alarm 0x%02x 0x%02x ctrl2 %02x\n", + __func__, ald[0], ald[1], ctrl2); + + /* Hardware alarms precision is 1 minute! */ + t->time.tm_sec = 0; + t->time.tm_min = bcd2bin(ald[0] & 0x7f); + if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + t->time.tm_hour = bcd2bin(ald[1] & 0x3f); + else + t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12 + + (ald[1] & 0x20 ? 12 : 0); + + t->time.tm_wday = -1; + t->time.tm_mday = -1; + t->time.tm_mon = -1; + t->time.tm_year = -1; + + dev_dbg(dev, "%s: date: %ds %dm %dh %dmd %dm %dy\n", + __func__, + t->time.tm_sec, t->time.tm_min, t->time.tm_hour, + t->time.tm_mday, t->time.tm_mon, t->time.tm_year); + t->enabled = !!(rx8025->ctrl1 & RX8025_BIT_CTRL1_DALE); + t->pending = (ctrl2 & RX8025_BIT_CTRL2_DAFG) && t->enabled; + + return err; +} + +static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rx8025_data *rx8025 = dev_get_drvdata(dev); + u8 ald[2]; + int err; + + if (client->irq <= 0) + return -EINVAL; + + /* Hardware alarm precision is 1 minute! */ + ald[0] = bin2bcd(t->time.tm_min); + if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + ald[1] = bin2bcd(t->time.tm_hour); + else + ald[1] = (t->time.tm_hour >= 12 ? 0x20 : 0) + | bin2bcd((t->time.tm_hour + 11) % 12 + 1); + + dev_dbg(dev, "%s: write 0x%02x 0x%02x\n", __func__, ald[0], ald[1]); + + if (rx8025->ctrl1 & RX8025_BIT_CTRL1_DALE) { + rx8025->ctrl1 &= ~RX8025_BIT_CTRL1_DALE; + err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1, + rx8025->ctrl1); + if (err) + return err; + } + err = rx8025_write_regs(rx8025->client, RX8025_REG_ALDMIN, 2, ald); + if (err) + return err; + + if (t->enabled) { + rx8025->ctrl1 |= RX8025_BIT_CTRL1_DALE; + err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1, + rx8025->ctrl1); + if (err) + return err; + } + + return 0; +} + +static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct rx8025_data *rx8025 = dev_get_drvdata(dev); + u8 ctrl1; + int err; + + ctrl1 = rx8025->ctrl1; + if (enabled) + ctrl1 |= RX8025_BIT_CTRL1_DALE; + else + ctrl1 &= ~RX8025_BIT_CTRL1_DALE; + + if (ctrl1 != rx8025->ctrl1) { + rx8025->ctrl1 = ctrl1; + err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1, + rx8025->ctrl1); + if (err) + return err; + } + return 0; +} + +static int rx8025_irq_set_state(struct device *dev, int enabled) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rx8025_data *rx8025 = i2c_get_clientdata(client); + int ctrl1; + int err; + + if (client->irq <= 0) + return -ENXIO; + + ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT; + if (enabled) + ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ; + if (ctrl1 != rx8025->ctrl1) { + rx8025->ctrl1 = ctrl1; + err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1, + rx8025->ctrl1); + if (err) + return err; + } + + return 0; +} + +static struct rtc_class_ops rx8025_rtc_ops = { + .read_time = rx8025_get_time, + .set_time = rx8025_set_time, + .read_alarm = rx8025_read_alarm, + .set_alarm = rx8025_set_alarm, + .alarm_irq_enable = rx8025_alarm_irq_enable, + .irq_set_state = rx8025_irq_set_state, +}; + +/* + * Clock precision adjustment support + * + * According to the RX8025 SA/NB application manual the frequency and + * temperature charateristics can be approximated using the following + * equation: + * + * df = a * (ut - t)**2 + * + * df: Frequency deviation in any temperature + * a : Coefficient = (-35 +-5) * 10**-9 + * ut: Ultimate temperature in degree = +25 +-5 degree + * t : Any temperature in degree + * + * Note that the clock adjustment in ppb must be entered (which is + * the negative value of the deviation). + */ +static int rx8025_get_clock_adjust(struct device *dev, int *adj) +{ + struct i2c_client *client = to_i2c_client(dev); + u8 digoff; + int err; + + err = rx8025_read_reg(client, RX8025_REG_DIGOFF, &digoff); + if (err) + return err; + + *adj = digoff >= 64 ? digoff - 128 : digoff; + if (*adj > 0) + (*adj)--; + *adj *= -RX8025_ADJ_RESOLUTION; + + return 0; +} + +static int rx8025_set_clock_adjust(struct device *dev, int adj) +{ + struct i2c_client *client = to_i2c_client(dev); + u8 digoff; + int err; + + adj /= -RX8025_ADJ_RESOLUTION; + if (adj > RX8025_ADJ_DATA_MAX) + adj = RX8025_ADJ_DATA_MAX; + else if (adj < RX8025_ADJ_DATA_MIN) + adj = RX8025_ADJ_DATA_MIN; + else if (adj > 0) + adj++; + else if (adj < 0) + adj += 128; + digoff = adj; + + err = rx8025_write_reg(client, RX8025_REG_DIGOFF, digoff); + if (err) + return err; + + dev_dbg(dev, "%s: write 0x%02x\n", __func__, digoff); + + return 0; +} + +static ssize_t rx8025_sysfs_show_clock_adjust(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int err, adj; + + err = rx8025_get_clock_adjust(dev, &adj); + if (err) + return err; + + return sprintf(buf, "%d\n", adj); +} + +static ssize_t rx8025_sysfs_store_clock_adjust(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int adj, err; + + if (sscanf(buf, "%i", &adj) != 1) + return -EINVAL; + + err = rx8025_set_clock_adjust(dev, adj); + + return err ? err : count; +} + +static DEVICE_ATTR(clock_adjust_ppb, S_IRUGO | S_IWUSR, + rx8025_sysfs_show_clock_adjust, + rx8025_sysfs_store_clock_adjust); + +static int rx8025_sysfs_register(struct device *dev) +{ + return device_create_file(dev, &dev_attr_clock_adjust_ppb); +} + +static void rx8025_sysfs_unregister(struct device *dev) +{ + device_remove_file(dev, &dev_attr_clock_adjust_ppb); +} + +static int __devinit rx8025_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct rx8025_data *rx8025; + int err, need_reset = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA + | I2C_FUNC_SMBUS_I2C_BLOCK)) { + dev_err(&adapter->dev, + "doesn't support required functionality\n"); + err = -EIO; + goto errout; + } + + rx8025 = kzalloc(sizeof(*rx8025), GFP_KERNEL); + if (!rx8025) { + dev_err(&adapter->dev, "failed to alloc memory\n"); + err = -ENOMEM; + goto errout; + } + + rx8025->client = client; + i2c_set_clientdata(client, rx8025); + INIT_WORK(&rx8025->work, rx8025_work); + + err = rx8025_init_client(client, &need_reset); + if (err) + goto errout_free; + + if (need_reset) { + struct rtc_time tm; + dev_info(&client->dev, + "bad conditions detected, resetting date\n"); + rtc_time_to_tm(0, &tm); /* 1970/1/1 */ + rx8025_set_time(&client->dev, &tm); + } + + rx8025->rtc = rtc_device_register(client->name, &client->dev, + &rx8025_rtc_ops, THIS_MODULE); + if (IS_ERR(rx8025->rtc)) { + err = PTR_ERR(rx8025->rtc); + dev_err(&client->dev, "unable to register the class device\n"); + goto errout_free; + } + + if (client->irq > 0) { + dev_info(&client->dev, "IRQ %d supplied\n", client->irq); + err = request_irq(client->irq, rx8025_irq, + 0, "rx8025", client); + if (err) { + dev_err(&client->dev, "unable to request IRQ\n"); + goto errout_reg; + } + } + + rx8025->rtc->irq_freq = 1; + rx8025->rtc->max_user_freq = 1; + + err = rx8025_sysfs_register(&client->dev); + if (err) + goto errout_irq; + + return 0; + +errout_irq: + if (client->irq > 0) + free_irq(client->irq, client); + +errout_reg: + rtc_device_unregister(rx8025->rtc); + +errout_free: + i2c_set_clientdata(client, NULL); + kfree(rx8025); + +errout: + dev_err(&adapter->dev, "probing for rx8025 failed\n"); + return err; +} + +static int __devexit rx8025_remove(struct i2c_client *client) +{ + struct rx8025_data *rx8025 = i2c_get_clientdata(client); + struct mutex *lock = &rx8025->rtc->ops_lock; + + if (client->irq > 0) { + mutex_lock(lock); + rx8025->exiting = 1; + mutex_unlock(lock); + + free_irq(client->irq, client); + flush_scheduled_work(); + } + + rx8025_sysfs_unregister(&client->dev); + rtc_device_unregister(rx8025->rtc); + i2c_set_clientdata(client, NULL); + kfree(rx8025); + return 0; +} + +static struct i2c_driver rx8025_driver = { + .driver = { + .name = "rtc-rx8025", + .owner = THIS_MODULE, + }, + .probe = rx8025_probe, + .remove = __devexit_p(rx8025_remove), + .id_table = rx8025_id, +}; + +static int __init rx8025_init(void) +{ + return i2c_add_driver(&rx8025_driver); +} + +static void __exit rx8025_exit(void) +{ + i2c_del_driver(&rx8025_driver); +} + +MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>"); +MODULE_DESCRIPTION("RX-8025 SA/NB RTC driver"); +MODULE_LICENSE("GPL"); + +module_init(rx8025_init); +module_exit(rx8025_exit); diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c index 4ee4857ff20..4a6ed1104fb 100644 --- a/drivers/rtc/rtc-tx4939.c +++ b/drivers/rtc/rtc-tx4939.c @@ -261,10 +261,8 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev) tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt, - IRQF_DISABLED | IRQF_SHARED, - pdev->name, &pdev->dev) < 0) { + IRQF_DISABLED, pdev->name, &pdev->dev) < 0) return -EBUSY; - } rtc = rtc_device_register(pdev->name, &pdev->dev, &tx4939_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index 12e443cc4ac..f5b3fdbb1e2 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c @@ -530,9 +530,6 @@ atmel_spi_interrupt(int irq, void *dev_id) return ret; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) - static int atmel_spi_setup(struct spi_device *spi) { struct atmel_spi *as; @@ -555,8 +552,6 @@ static int atmel_spi_setup(struct spi_device *spi) return -EINVAL; } - if (bits == 0) - bits = 8; if (bits < 8 || bits > 16) { dev_dbg(&spi->dev, "setup: invalid bits_per_word %u (8 to 16)\n", @@ -564,12 +559,6 @@ static int atmel_spi_setup(struct spi_device *spi) return -EINVAL; } - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - /* see notes above re chipselect */ if (!atmel_spi_is_v2() && spi->chip_select == 0 @@ -775,6 +764,9 @@ static int __init atmel_spi_probe(struct platform_device *pdev) if (!master) goto out_free; + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->bus_num = pdev->id; master->num_chipselect = 4; master->setup = atmel_spi_setup; diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c index b02f25c702f..76cbc1a6659 100644 --- a/drivers/spi/au1550_spi.c +++ b/drivers/spi/au1550_spi.c @@ -284,27 +284,16 @@ static int au1550_spi_setupxfer(struct spi_device *spi, struct spi_transfer *t) return 0; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST) - static int au1550_spi_setup(struct spi_device *spi) { struct au1550_spi *hw = spi_master_get_devdata(spi->master); - if (spi->bits_per_word == 0) - spi->bits_per_word = 8; if (spi->bits_per_word < 4 || spi->bits_per_word > 24) { dev_err(&spi->dev, "setup: invalid bits_per_word=%d\n", spi->bits_per_word); return -EINVAL; } - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - if (spi->max_speed_hz == 0) spi->max_speed_hz = hw->freq_max; if (spi->max_speed_hz > hw->freq_max @@ -781,6 +770,9 @@ static int __init au1550_spi_probe(struct platform_device *pdev) goto err_nomem; } + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST; + hw = spi_master_get_devdata(master); hw->master = spi_master_get(master); diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c index 68c77a91159..1b74d5ca03f 100644 --- a/drivers/spi/mpc52xx_psc_spi.c +++ b/drivers/spi/mpc52xx_psc_spi.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/types.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/of_platform.h> @@ -30,8 +31,7 @@ struct mpc52xx_psc_spi { /* fsl_spi_platform data */ - void (*activate_cs)(u8, u8); - void (*deactivate_cs)(u8, u8); + void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; /* driver internal data */ @@ -111,18 +111,16 @@ static void mpc52xx_psc_spi_activate_cs(struct spi_device *spi) out_be16((u16 __iomem *)&psc->ccr, ccr); mps->bits_per_word = cs->bits_per_word; - if (mps->activate_cs) - mps->activate_cs(spi->chip_select, - (spi->mode & SPI_CS_HIGH) ? 1 : 0); + if (mps->cs_control) + mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0); } static void mpc52xx_psc_spi_deactivate_cs(struct spi_device *spi) { struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master); - if (mps->deactivate_cs) - mps->deactivate_cs(spi->chip_select, - (spi->mode & SPI_CS_HIGH) ? 1 : 0); + if (mps->cs_control) + mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1); } #define MPC52xx_PSC_BUFSIZE (MPC52xx_PSC_RFNUM_MASK + 1) @@ -261,9 +259,6 @@ static void mpc52xx_psc_spi_work(struct work_struct *work) spin_unlock_irq(&mps->lock); } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST) - static int mpc52xx_psc_spi_setup(struct spi_device *spi) { struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master); @@ -273,12 +268,6 @@ static int mpc52xx_psc_spi_setup(struct spi_device *spi) if (spi->bits_per_word%8) return -EINVAL; - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - if (!cs) { cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) @@ -385,18 +374,19 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr, dev_set_drvdata(dev, master); mps = spi_master_get_devdata(master); + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST; + mps->irq = irq; if (pdata == NULL) { dev_warn(dev, "probe called without platform data, no " - "(de)activate_cs function will be called\n"); - mps->activate_cs = NULL; - mps->deactivate_cs = NULL; + "cs_control function will be called\n"); + mps->cs_control = NULL; mps->sysclk = 0; master->bus_num = bus_num; master->num_chipselect = 255; } else { - mps->activate_cs = pdata->activate_cs; - mps->deactivate_cs = pdata->deactivate_cs; + mps->cs_control = pdata->cs_control; mps->sysclk = pdata->sysclk; master->bus_num = pdata->bus_num; master->num_chipselect = pdata->max_chipselect; diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c index d6d0c5d241c..eee4b6e0af2 100644 --- a/drivers/spi/omap2_mcspi.c +++ b/drivers/spi/omap2_mcspi.c @@ -603,9 +603,6 @@ static int omap2_mcspi_request_dma(struct spi_device *spi) return 0; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) - static int omap2_mcspi_setup(struct spi_device *spi) { int ret; @@ -613,15 +610,7 @@ static int omap2_mcspi_setup(struct spi_device *spi) struct omap2_mcspi_dma *mcspi_dma; struct omap2_mcspi_cs *cs = spi->controller_state; - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - - if (spi->bits_per_word == 0) - spi->bits_per_word = 8; - else if (spi->bits_per_word < 4 || spi->bits_per_word > 32) { + if (spi->bits_per_word < 4 || spi->bits_per_word > 32) { dev_dbg(&spi->dev, "setup: unsupported %d bit words\n", spi->bits_per_word); return -EINVAL; @@ -984,6 +973,9 @@ static int __init omap2_mcspi_probe(struct platform_device *pdev) return -ENOMEM; } + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + if (pdev->id != -1) master->bus_num = pdev->id; diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c index fe8b9ac0cce..aa90ddb3706 100644 --- a/drivers/spi/omap_uwire.c +++ b/drivers/spi/omap_uwire.c @@ -339,8 +339,6 @@ static int uwire_setup_transfer(struct spi_device *spi, struct spi_transfer *t) bits = spi->bits_per_word; if (t != NULL && t->bits_per_word) bits = t->bits_per_word; - if (!bits) - bits = 8; if (bits > 16) { pr_debug("%s: wordsize %d?\n", dev_name(&spi->dev), bits); @@ -449,19 +447,10 @@ done: return status; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) - static int uwire_setup(struct spi_device *spi) { struct uwire_state *ust = spi->controller_state; - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - if (ust == NULL) { ust = kzalloc(sizeof(*ust), GFP_KERNEL); if (ust == NULL) @@ -522,6 +511,9 @@ static int __init uwire_probe(struct platform_device *pdev) uwire_write_reg(UWIRE_SR3, 1); + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->bus_num = 2; /* "official" */ master->num_chipselect = 4; master->setup = uwire_setup; diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c index c8b0babdc2a..3aea50da7b2 100644 --- a/drivers/spi/orion_spi.c +++ b/drivers/spi/orion_spi.c @@ -358,20 +358,11 @@ static int orion_spi_setup(struct spi_device *spi) orion_spi = spi_master_get_devdata(spi->master); - if (spi->mode) { - dev_err(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode); - return -EINVAL; - } - /* Fix ac timing if required. */ if (orion_spi->spi_info->enable_clock_fix) orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG, (1 << 14)); - if (spi->bits_per_word == 0) - spi->bits_per_word = 8; - if ((spi->max_speed_hz == 0) || (spi->max_speed_hz > orion_spi->max_speed)) spi->max_speed_hz = orion_spi->max_speed; @@ -476,6 +467,9 @@ static int __init orion_spi_probe(struct platform_device *pdev) if (pdev->id != -1) master->bus_num = pdev->id; + /* we support only mode 0, and no options */ + master->mode_bits = 0; + master->setup = orion_spi_setup; master->transfer = orion_spi_transfer; master->num_chipselect = ORION_NUM_CHIPSELECTS; diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 3f3c08c6ba4..d949dbf1141 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -1185,9 +1185,6 @@ static int transfer(struct spi_device *spi, struct spi_message *msg) return 0; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA) - static int setup_cs(struct spi_device *spi, struct chip_data *chip, struct pxa2xx_spi_chip *chip_info) { @@ -1236,9 +1233,6 @@ static int setup(struct spi_device *spi) uint tx_thres = TX_THRESH_DFLT; uint rx_thres = RX_THRESH_DFLT; - if (!spi->bits_per_word) - spi->bits_per_word = 8; - if (drv_data->ssp_type != PXA25x_SSP && (spi->bits_per_word < 4 || spi->bits_per_word > 32)) { dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d " @@ -1255,12 +1249,6 @@ static int setup(struct spi_device *spi) return -EINVAL; } - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - /* Only alloc on first setup */ chip = spi_get_ctldata(spi); if (!chip) { @@ -1328,18 +1316,14 @@ static int setup(struct spi_device *spi) /* NOTE: PXA25x_SSP _could_ use external clocking ... */ if (drv_data->ssp_type != PXA25x_SSP) - dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n", - spi->bits_per_word, + dev_dbg(&spi->dev, "%ld Hz actual, %s\n", clk_get_rate(ssp->clk) / (1 + ((chip->cr0 & SSCR0_SCR) >> 8)), - spi->mode & 0x3, chip->enable_dma ? "DMA" : "PIO"); else - dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n", - spi->bits_per_word, + dev_dbg(&spi->dev, "%ld Hz actual, %s\n", clk_get_rate(ssp->clk) / 2 / (1 + ((chip->cr0 & SSCR0_SCR) >> 8)), - spi->mode & 0x3, chip->enable_dma ? "DMA" : "PIO"); if (spi->bits_per_word <= 8) { @@ -1500,6 +1484,9 @@ static int __init pxa2xx_spi_probe(struct platform_device *pdev) drv_data->pdev = pdev; drv_data->ssp = ssp; + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->bus_num = pdev->id; master->num_chipselect = platform_info->num_chipselect; master->dma_alignment = DMA_ALIGNMENT; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8eba98c8ed1..70845ccd85c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -265,7 +265,7 @@ int spi_add_device(struct spi_device *spi) * normally rely on the device being setup. Devices * using SPI_CS_HIGH can't coexist well otherwise... */ - status = spi->master->setup(spi); + status = spi_setup(spi); if (status < 0) { dev_err(dev, "can't %s %s, status %d\n", "setup", dev_name(&spi->dev), status); @@ -583,6 +583,70 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master); /*-------------------------------------------------------------------------*/ +/* Core methods for SPI master protocol drivers. Some of the + * other core methods are currently defined as inline functions. + */ + +/** + * spi_setup - setup SPI mode and clock rate + * @spi: the device whose settings are being modified + * Context: can sleep, and no requests are queued to the device + * + * SPI protocol drivers may need to update the transfer mode if the + * device doesn't work with its default. They may likewise need + * to update clock rates or word sizes from initial values. This function + * changes those settings, and must be called from a context that can sleep. + * Except for SPI_CS_HIGH, which takes effect immediately, the changes take + * effect the next time the device is selected and data is transferred to + * or from it. When this function returns, the spi device is deselected. + * + * Note that this call will fail if the protocol driver specifies an option + * that the underlying controller or its driver does not support. For + * example, not all hardware supports wire transfers using nine bit words, + * LSB-first wire encoding, or active-high chipselects. + */ +int spi_setup(struct spi_device *spi) +{ + unsigned bad_bits; + int status; + + /* help drivers fail *cleanly* when they need options + * that aren't supported with their current master + */ + bad_bits = spi->mode & ~spi->master->mode_bits; + if (bad_bits) { + dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", + bad_bits); + return -EINVAL; + } + + if (!spi->bits_per_word) + spi->bits_per_word = 8; + + status = spi->master->setup(spi); + + dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s" + "%u bits/w, %u Hz max --> %d\n", + (int) (spi->mode & (SPI_CPOL | SPI_CPHA)), + (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "", + (spi->mode & SPI_LSB_FIRST) ? "lsb, " : "", + (spi->mode & SPI_3WIRE) ? "3wire, " : "", + (spi->mode & SPI_LOOP) ? "loopback, " : "", + spi->bits_per_word, spi->max_speed_hz, + status); + + return status; +} +EXPORT_SYMBOL_GPL(spi_setup); + + +/*-------------------------------------------------------------------------*/ + +/* Utility methods for SPI master protocol drivers, layered on + * top of the core. Some other utility methods are defined as + * inline functions. + */ + static void spi_complete(void *arg) { complete(arg); @@ -636,8 +700,8 @@ static u8 *buf; * @spi: device with which data will be exchanged * @txbuf: data to be written (need not be dma-safe) * @n_tx: size of txbuf, in bytes - * @rxbuf: buffer into which data will be read - * @n_rx: size of rxbuf, in bytes (need not be dma-safe) + * @rxbuf: buffer into which data will be read (need not be dma-safe) + * @n_rx: size of rxbuf, in bytes * Context: can sleep * * This performs a half duplex MicroWire style transaction with the diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c index 011c5bddba6..73e24ef5a2f 100644 --- a/drivers/spi/spi_bfin5xx.c +++ b/drivers/spi/spi_bfin5xx.c @@ -169,7 +169,7 @@ static int bfin_spi_flush(struct driver_data *drv_data) unsigned long limit = loops_per_jiffy << 1; /* wait for stop and clear stat */ - while (!(read_STAT(drv_data) & BIT_STAT_SPIF) && limit--) + while (!(read_STAT(drv_data) & BIT_STAT_SPIF) && --limit) cpu_relax(); write_STAT(drv_data, BIT_STAT_CLR); @@ -1010,16 +1010,6 @@ static int bfin_spi_setup(struct spi_device *spi) struct driver_data *drv_data = spi_master_get_devdata(spi->master); int ret; - /* Abort device setup if requested features are not supported */ - if (spi->mode & ~(SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST)) { - dev_err(&spi->dev, "requested mode not fully supported\n"); - return -EINVAL; - } - - /* Zero (the default) here means 8 bits */ - if (!spi->bits_per_word) - spi->bits_per_word = 8; - if (spi->bits_per_word != 8 && spi->bits_per_word != 16) return -EINVAL; @@ -1287,6 +1277,9 @@ static int __init bfin_spi_probe(struct platform_device *pdev) drv_data->pdev = pdev; drv_data->pin_req = platform_info->pin_req; + /* the spi->mode bits supported by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST; + master->bus_num = pdev->id; master->num_chipselect = platform_info->num_chipselect; master->cleanup = bfin_spi_cleanup; diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c index 85e61f45121..2a5abc08e85 100644 --- a/drivers/spi/spi_bitbang.c +++ b/drivers/spi/spi_bitbang.c @@ -188,12 +188,6 @@ int spi_bitbang_setup(struct spi_device *spi) bitbang = spi_master_get_devdata(spi->master); - /* Bitbangers can support SPI_CS_HIGH, SPI_3WIRE, and so on; - * add those to master->flags, and provide the other support. - */ - if ((spi->mode & ~(SPI_CPOL|SPI_CPHA|bitbang->flags)) != 0) - return -EINVAL; - if (!cs) { cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) @@ -201,9 +195,6 @@ int spi_bitbang_setup(struct spi_device *spi) spi->controller_state = cs; } - if (!spi->bits_per_word) - spi->bits_per_word = 8; - /* per-word shift register access, in hardware or bitbanging */ cs->txrx_word = bitbang->txrx_word[spi->mode & (SPI_CPOL|SPI_CPHA)]; if (!cs->txrx_word) @@ -213,9 +204,7 @@ int spi_bitbang_setup(struct spi_device *spi) if (retval < 0) return retval; - dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n", - __func__, spi->mode & (SPI_CPOL | SPI_CPHA), - spi->bits_per_word, 2 * cs->nsecs); + dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs); /* NOTE we _need_ to call chipselect() early, ideally with adapter * setup, unless the hardware defaults cooperate to avoid confusion @@ -457,6 +446,9 @@ int spi_bitbang_start(struct spi_bitbang *bitbang) spin_lock_init(&bitbang->lock); INIT_LIST_HEAD(&bitbang->queue); + if (!bitbang->master->mode_bits) + bitbang->master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags; + if (!bitbang->master->transfer) bitbang->master->transfer = spi_bitbang_transfer; if (!bitbang->txrx_bufs) { diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c index 0671aeef579..c195e45f7f3 100644 --- a/drivers/spi/spi_imx.c +++ b/drivers/spi/spi_imx.c @@ -1171,9 +1171,6 @@ msg_rejected: return -EINVAL; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) - /* On first setup bad values must free chip_data memory since will cause spi_new_device to fail. Bad value setup from protocol driver are simply not applied and notified to the calling driver. */ @@ -1186,12 +1183,6 @@ static int setup(struct spi_device *spi) u32 tmp; int status = 0; - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - /* Get controller data */ chip_info = spi->controller_data; @@ -1286,10 +1277,7 @@ static int setup(struct spi_device *spi) /* SPI word width */ tmp = spi->bits_per_word; - if (tmp == 0) { - tmp = 8; - spi->bits_per_word = 8; - } else if (tmp > 16) { + if (tmp > 16) { status = -EINVAL; dev_err(&spi->dev, "setup - " @@ -1481,6 +1469,9 @@ static int __init spi_imx_probe(struct platform_device *pdev) drv_data->master_info = platform_info; drv_data->pdev = pdev; + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->bus_num = pdev->id; master->num_chipselect = platform_info->num_chipselect; master->dma_alignment = DMA_ALIGNMENT; diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c index a32ccb44065..ce61be98e06 100644 --- a/drivers/spi/spi_mpc83xx.c +++ b/drivers/spi/spi_mpc83xx.c @@ -419,10 +419,6 @@ static void mpc83xx_spi_work(struct work_struct *work) spin_unlock_irq(&mpc83xx_spi->lock); } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \ - | SPI_LSB_FIRST | SPI_LOOP) - static int mpc83xx_spi_setup(struct spi_device *spi) { struct mpc83xx_spi *mpc83xx_spi; @@ -430,12 +426,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi) u32 hw_mode; struct spi_mpc83xx_cs *cs = spi->controller_state; - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - if (!spi->max_speed_hz) return -EINVAL; @@ -447,9 +437,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi) } mpc83xx_spi = spi_master_get_devdata(spi->master); - if (!spi->bits_per_word) - spi->bits_per_word = 8; - hw_mode = cs->hw_mode; /* Save orginal settings */ cs->hw_mode = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode); /* mask out bits we are going to set */ @@ -471,9 +458,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi) return retval; } - dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u Hz\n", - __func__, spi->mode & (SPI_CPOL | SPI_CPHA), - spi->bits_per_word, spi->max_speed_hz); #if 0 /* Don't think this is needed */ /* NOTE we _need_ to call chipselect() early, ideally with adapter * setup, unless the hardware defaults cooperate to avoid confusion @@ -568,6 +552,10 @@ mpc83xx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq) dev_set_drvdata(dev, master); + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH + | SPI_LSB_FIRST | SPI_LOOP; + master->setup = mpc83xx_spi_setup; master->transfer = mpc83xx_spi_transfer; master->cleanup = mpc83xx_spi_cleanup; diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index b3ebc1d0f85..e0d44af4745 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -146,32 +146,16 @@ static int s3c24xx_spi_setupxfer(struct spi_device *spi, return 0; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH) - static int s3c24xx_spi_setup(struct spi_device *spi) { int ret; - if (!spi->bits_per_word) - spi->bits_per_word = 8; - - if (spi->mode & ~MODEBITS) { - dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return -EINVAL; - } - ret = s3c24xx_spi_setupxfer(spi, NULL); if (ret < 0) { dev_err(&spi->dev, "setupxfer returned %d\n", ret); return ret; } - dev_dbg(&spi->dev, "%s: mode %d, %u bpw, %d hz\n", - __func__, spi->mode, spi->bits_per_word, - spi->max_speed_hz); - return 0; } @@ -290,6 +274,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev) /* setup the master state. */ + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->num_chipselect = hw->pdata->num_cs; master->bus_num = pdata->bus_num; diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c index 29cbb065618..96057de133a 100644 --- a/drivers/spi/spi_txx9.c +++ b/drivers/spi/spi_txx9.c @@ -110,23 +110,17 @@ static void txx9spi_cs_func(struct spi_device *spi, struct txx9spi *c, ndelay(cs_delay); /* CS Setup Time / CS Recovery Time */ } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CS_HIGH|SPI_CPOL|SPI_CPHA) - static int txx9spi_setup(struct spi_device *spi) { struct txx9spi *c = spi_master_get_devdata(spi->master); u8 bits_per_word; - if (spi->mode & ~MODEBITS) - return -EINVAL; - if (!spi->max_speed_hz || spi->max_speed_hz > c->max_speed_hz || spi->max_speed_hz < c->min_speed_hz) return -EINVAL; - bits_per_word = spi->bits_per_word ? : 8; + bits_per_word = spi->bits_per_word; if (bits_per_word != 8 && bits_per_word != 16) return -EINVAL; @@ -414,6 +408,9 @@ static int __init txx9spi_probe(struct platform_device *dev) (unsigned long long)res->start, irq, (c->baseclk + 500000) / 1000000); + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA; + master->bus_num = dev->id; master->setup = txx9spi_setup; master->transfer = txx9spi_transfer; diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c index 494d3f756e2..46b8c5c2f45 100644 --- a/drivers/spi/xilinx_spi.c +++ b/drivers/spi/xilinx_spi.c @@ -158,9 +158,6 @@ static int xilinx_spi_setup_transfer(struct spi_device *spi, return 0; } -/* the spi->mode bits understood by this driver: */ -#define MODEBITS (SPI_CPOL | SPI_CPHA) - static int xilinx_spi_setup(struct spi_device *spi) { struct spi_bitbang *bitbang; @@ -170,22 +167,10 @@ static int xilinx_spi_setup(struct spi_device *spi) xspi = spi_master_get_devdata(spi->master); bitbang = &xspi->bitbang; - if (!spi->bits_per_word) - spi->bits_per_word = 8; - - if (spi->mode & ~MODEBITS) { - dev_err(&spi->dev, "%s, unsupported mode bits %x\n", - __func__, spi->mode & ~MODEBITS); - return -EINVAL; - } - retval = xilinx_spi_setup_transfer(spi, NULL); if (retval < 0) return retval; - dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n", - __func__, spi->mode & MODEBITS, spi->bits_per_word, 0); - return 0; } @@ -333,6 +318,9 @@ static int __init xilinx_spi_of_probe(struct of_device *ofdev, goto put_master; } + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA; + xspi = spi_master_get_devdata(master); xspi->bitbang.master = spi_master_get(master); xspi->bitbang.chipselect = xilinx_spi_chipselect; diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index a411702413d..6f8866d6a90 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -74,6 +74,9 @@ static int __init w1_gpio_probe(struct platform_device *pdev) if (err) goto free_gpio; + if (pdata->enable_external_pullup) + pdata->enable_external_pullup(1); + platform_set_drvdata(pdev, master); return 0; @@ -91,6 +94,9 @@ static int __exit w1_gpio_remove(struct platform_device *pdev) struct w1_bus_master *master = platform_get_drvdata(pdev); struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + if (pdata->enable_external_pullup) + pdata->enable_external_pullup(0); + w1_remove_master_device(master); gpio_free(pdata->pin); kfree(master); @@ -98,12 +104,41 @@ static int __exit w1_gpio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM + +static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + + if (pdata->enable_external_pullup) + pdata->enable_external_pullup(0); + + return 0; +} + +static int w1_gpio_resume(struct platform_device *pdev) +{ + struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + + if (pdata->enable_external_pullup) + pdata->enable_external_pullup(1); + + return 0; +} + +#else +#define w1_gpio_suspend NULL +#define w1_gpio_resume NULL +#endif + static struct platform_driver w1_gpio_driver = { .driver = { .name = "w1-gpio", .owner = THIS_MODULE, }, .remove = __exit_p(w1_gpio_remove), + .suspend = w1_gpio_suspend, + .resume = w1_gpio_resume, }; static int __init w1_gpio_init(void) |