summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/char/Kconfig2
-rw-r--r--drivers/char/isicom.c2
-rw-r--r--drivers/char/mem.c115
-rw-r--r--drivers/char/ppdev.c29
-rw-r--r--drivers/char/sysrq.c15
-rw-r--r--drivers/edac/Kconfig11
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/amd8111_edac.c3
-rw-r--r--drivers/edac/cell_edac.c2
-rw-r--r--drivers/edac/cpc925_edac.c1017
-rw-r--r--drivers/edac/edac_core.h1
-rw-r--r--drivers/edac/edac_device.c14
-rw-r--r--drivers/gpio/max7301.c2
-rw-r--r--drivers/gpio/pca953x.c80
-rw-r--r--drivers/md/faulty.c21
-rw-r--r--drivers/md/linear.c218
-rw-r--r--drivers/md/linear.h12
-rw-r--r--drivers/md/md.c196
-rw-r--r--drivers/md/md.h14
-rw-r--r--drivers/md/multipath.c23
-rw-r--r--drivers/md/multipath.h6
-rw-r--r--drivers/md/raid0.c403
-rw-r--r--drivers/md/raid0.h10
-rw-r--r--drivers/md/raid1.c46
-rw-r--r--drivers/md/raid1.h6
-rw-r--r--drivers/md/raid10.c62
-rw-r--r--drivers/md/raid10.h6
-rw-r--r--drivers/md/raid5.c218
-rw-r--r--drivers/md/raid5.h8
-rw-r--r--drivers/misc/sgi-gru/Makefile2
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h68
-rw-r--r--drivers/misc/sgi-gru/grufault.c118
-rw-r--r--drivers/misc/sgi-gru/grufile.c69
-rw-r--r--drivers/misc/sgi-gru/gruhandles.c17
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h30
-rw-r--r--drivers/misc/sgi-gru/grukdump.c232
-rw-r--r--drivers/misc/sgi-gru/grukservices.c562
-rw-r--r--drivers/misc/sgi-gru/grukservices.h51
-rw-r--r--drivers/misc/sgi-gru/grulib.h69
-rw-r--r--drivers/misc/sgi-gru/grumain.c187
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c17
-rw-r--r--drivers/misc/sgi-gru/grutables.h60
-rw-r--r--drivers/pps/Kconfig33
-rw-r--r--drivers/pps/Makefile8
-rw-r--r--drivers/pps/kapi.c329
-rw-r--r--drivers/pps/pps.c312
-rw-r--r--drivers/pps/sysfs.c98
-rw-r--r--drivers/rtc/Kconfig9
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/rtc-ds1307.c41
-rw-r--r--drivers/rtc/rtc-ds1553.c3
-rw-r--r--drivers/rtc/rtc-ds1742.c31
-rw-r--r--drivers/rtc/rtc-rx8025.c688
-rw-r--r--drivers/rtc/rtc-tx4939.c4
-rw-r--r--drivers/spi/atmel_spi.c14
-rw-r--r--drivers/spi/au1550_spi.c14
-rw-r--r--drivers/spi/mpc52xx_psc_spi.c34
-rw-r--r--drivers/spi/omap2_mcspi.c16
-rw-r--r--drivers/spi/omap_uwire.c14
-rw-r--r--drivers/spi/orion_spi.c12
-rw-r--r--drivers/spi/pxa2xx_spi.c23
-rw-r--r--drivers/spi/spi.c70
-rw-r--r--drivers/spi/spi_bfin5xx.c15
-rw-r--r--drivers/spi/spi_bitbang.c16
-rw-r--r--drivers/spi/spi_imx.c17
-rw-r--r--drivers/spi/spi_mpc83xx.c20
-rw-r--r--drivers/spi/spi_s3c24xx.c19
-rw-r--r--drivers/spi/spi_txx9.c11
-rw-r--r--drivers/spi/xilinx_spi.c18
-rw-r--r--drivers/w1/masters/w1-gpio.c35
72 files changed, 4629 insertions, 1304 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index a442c8f29fc..48bbdbe43e6 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -52,6 +52,8 @@ source "drivers/i2c/Kconfig"
source "drivers/spi/Kconfig"
+source "drivers/pps/Kconfig"
+
source "drivers/gpio/Kconfig"
source "drivers/w1/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 00b44f4ccf0..bc4205d2fc3 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_INPUT) += input/
obj-$(CONFIG_I2O) += message/
obj-$(CONFIG_RTC_LIB) += rtc/
obj-y += i2c/ media/
+obj-$(CONFIG_PPS) += pps/
obj-$(CONFIG_W1) += w1/
obj-$(CONFIG_POWER_SUPPLY) += power/
obj-$(CONFIG_HWMON) += hwmon/
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 30bae6de6a0..0bd01f49cfd 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -807,7 +807,7 @@ if RTC_LIB=n
config RTC
tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)"
depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV \
- && !ARM && !SUPERH && !S390 && !AVR32
+ && !ARM && !SUPERH && !S390 && !AVR32 && !BLACKFIN
---help---
If you say Y here and create a character special file /dev/rtc with
major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 4d745a89504..4159292e35c 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -1593,7 +1593,7 @@ static unsigned int card_count;
static int __devinit isicom_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
- unsigned int signature, index;
+ unsigned int uninitialized_var(signature), index;
int retval = -EPERM;
struct isi_board *board = NULL;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index f96d0bef855..afa8813e737 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -863,59 +863,58 @@ static const struct file_operations kmsg_fops = {
.write = kmsg_write,
};
-static int memory_open(struct inode * inode, struct file * filp)
-{
- int ret = 0;
-
- lock_kernel();
- switch (iminor(inode)) {
- case 1:
- filp->f_op = &mem_fops;
- filp->f_mapping->backing_dev_info =
- &directly_mappable_cdev_bdi;
- break;
+static const struct {
+ unsigned int minor;
+ char *name;
+ umode_t mode;
+ const struct file_operations *fops;
+ struct backing_dev_info *dev_info;
+} devlist[] = { /* list of minor devices */
+ {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops,
+ &directly_mappable_cdev_bdi},
#ifdef CONFIG_DEVKMEM
- case 2:
- filp->f_op = &kmem_fops;
- filp->f_mapping->backing_dev_info =
- &directly_mappable_cdev_bdi;
- break;
+ {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops,
+ &directly_mappable_cdev_bdi},
#endif
- case 3:
- filp->f_op = &null_fops;
- break;
+ {3, "null", S_IRUGO | S_IWUGO, &null_fops, NULL},
#ifdef CONFIG_DEVPORT
- case 4:
- filp->f_op = &port_fops;
- break;
+ {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops, NULL},
#endif
- case 5:
- filp->f_mapping->backing_dev_info = &zero_bdi;
- filp->f_op = &zero_fops;
- break;
- case 7:
- filp->f_op = &full_fops;
- break;
- case 8:
- filp->f_op = &random_fops;
- break;
- case 9:
- filp->f_op = &urandom_fops;
- break;
- case 11:
- filp->f_op = &kmsg_fops;
- break;
+ {5, "zero", S_IRUGO | S_IWUGO, &zero_fops, &zero_bdi},
+ {7, "full", S_IRUGO | S_IWUGO, &full_fops, NULL},
+ {8, "random", S_IRUGO | S_IWUSR, &random_fops, NULL},
+ {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops, NULL},
+ {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops, NULL},
#ifdef CONFIG_CRASH_DUMP
- case 12:
- filp->f_op = &oldmem_fops;
- break;
+ {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops, NULL},
#endif
- default:
- unlock_kernel();
- return -ENXIO;
+};
+
+static int memory_open(struct inode *inode, struct file *filp)
+{
+ int ret = 0;
+ int i;
+
+ lock_kernel();
+
+ for (i = 0; i < ARRAY_SIZE(devlist); i++) {
+ if (devlist[i].minor == iminor(inode)) {
+ filp->f_op = devlist[i].fops;
+ if (devlist[i].dev_info) {
+ filp->f_mapping->backing_dev_info =
+ devlist[i].dev_info;
+ }
+
+ break;
+ }
}
- if (filp->f_op && filp->f_op->open)
- ret = filp->f_op->open(inode,filp);
+
+ if (i == ARRAY_SIZE(devlist))
+ ret = -ENXIO;
+ else
+ if (filp->f_op && filp->f_op->open)
+ ret = filp->f_op->open(inode, filp);
+
unlock_kernel();
return ret;
}
@@ -924,30 +923,6 @@ static const struct file_operations memory_fops = {
.open = memory_open, /* just a selector for the real open */
};
-static const struct {
- unsigned int minor;
- char *name;
- umode_t mode;
- const struct file_operations *fops;
-} devlist[] = { /* list of minor devices */
- {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
-#ifdef CONFIG_DEVKMEM
- {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
-#endif
- {3, "null", S_IRUGO | S_IWUGO, &null_fops},
-#ifdef CONFIG_DEVPORT
- {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
-#endif
- {5, "zero", S_IRUGO | S_IWUGO, &zero_fops},
- {7, "full", S_IRUGO | S_IWUGO, &full_fops},
- {8, "random", S_IRUGO | S_IWUSR, &random_fops},
- {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops},
- {11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops},
-#ifdef CONFIG_CRASH_DUMP
- {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
-#endif
-};
-
static struct class *mem_class;
static int __init chr_dev_init(void)
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index c84c34fb123..432655bcb04 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -114,8 +114,7 @@ static ssize_t pp_read (struct file * file, char __user * buf, size_t count,
if (!(pp->flags & PP_CLAIMED)) {
/* Don't have the port claimed */
- printk (KERN_DEBUG CHRDEV "%x: claim the port first\n",
- minor);
+ pr_debug(CHRDEV "%x: claim the port first\n", minor);
return -EINVAL;
}
@@ -198,8 +197,7 @@ static ssize_t pp_write (struct file * file, const char __user * buf,
if (!(pp->flags & PP_CLAIMED)) {
/* Don't have the port claimed */
- printk (KERN_DEBUG CHRDEV "%x: claim the port first\n",
- minor);
+ pr_debug(CHRDEV "%x: claim the port first\n", minor);
return -EINVAL;
}
@@ -313,7 +311,7 @@ static int register_device (int minor, struct pp_struct *pp)
}
pp->pdev = pdev;
- printk (KERN_DEBUG "%s: registered pardevice\n", name);
+ pr_debug("%s: registered pardevice\n", name);
return 0;
}
@@ -343,8 +341,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
int ret;
if (pp->flags & PP_CLAIMED) {
- printk (KERN_DEBUG CHRDEV
- "%x: you've already got it!\n", minor);
+ pr_debug(CHRDEV "%x: you've already got it!\n", minor);
return -EINVAL;
}
@@ -379,7 +376,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
case PPEXCL:
if (pp->pdev) {
- printk (KERN_DEBUG CHRDEV "%x: too late for PPEXCL; "
+ pr_debug(CHRDEV "%x: too late for PPEXCL; "
"already registered\n", minor);
if (pp->flags & PP_EXCL)
/* But it's not really an error. */
@@ -491,8 +488,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
/* Everything else requires the port to be claimed, so check
* that now. */
if ((pp->flags & PP_CLAIMED) == 0) {
- printk (KERN_DEBUG CHRDEV "%x: claim the port first\n",
- minor);
+ pr_debug(CHRDEV "%x: claim the port first\n", minor);
return -EINVAL;
}
@@ -624,8 +620,7 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
default:
- printk (KERN_DEBUG CHRDEV "%x: What? (cmd=0x%x)\n", minor,
- cmd);
+ pr_debug(CHRDEV "%x: What? (cmd=0x%x)\n", minor, cmd);
return -EINVAL;
}
@@ -698,9 +693,8 @@ static int pp_release (struct inode * inode, struct file * file)
}
if (compat_negot) {
parport_negotiate (pp->pdev->port, IEEE1284_MODE_COMPAT);
- printk (KERN_DEBUG CHRDEV
- "%x: negotiated back to compatibility mode because "
- "user-space forgot\n", minor);
+ pr_debug(CHRDEV "%x: negotiated back to compatibility "
+ "mode because user-space forgot\n", minor);
}
if (pp->flags & PP_CLAIMED) {
@@ -713,7 +707,7 @@ static int pp_release (struct inode * inode, struct file * file)
info->phase = pp->saved_state.phase;
parport_release (pp->pdev);
if (compat_negot != 1) {
- printk (KERN_DEBUG CHRDEV "%x: released pardevice "
+ pr_debug(CHRDEV "%x: released pardevice "
"because user-space forgot\n", minor);
}
}
@@ -723,8 +717,7 @@ static int pp_release (struct inode * inode, struct file * file)
parport_unregister_device (pp->pdev);
kfree (name);
pp->pdev = NULL;
- printk (KERN_DEBUG CHRDEV "%x: unregistered pardevice\n",
- minor);
+ pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
}
kfree (pp);
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 39a05b5fa9c..0db35857e4d 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -121,20 +121,17 @@ static struct sysrq_key_op sysrq_unraw_op = {
#define sysrq_unraw_op (*(struct sysrq_key_op *)0)
#endif /* CONFIG_VT */
-#ifdef CONFIG_KEXEC
-static void sysrq_handle_crashdump(int key, struct tty_struct *tty)
+static void sysrq_handle_crash(int key, struct tty_struct *tty)
{
- crash_kexec(get_irq_regs());
+ char *killer = NULL;
+ *killer = 1;
}
static struct sysrq_key_op sysrq_crashdump_op = {
- .handler = sysrq_handle_crashdump,
- .help_msg = "Crashdump",
- .action_msg = "Trigger a crashdump",
+ .handler = sysrq_handle_crash,
+ .help_msg = "Crash",
+ .action_msg = "Trigger a crash",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
-#else
-#define sysrq_crashdump_op (*(struct sysrq_key_op *)0)
-#endif
static void sysrq_handle_reboot(int key, struct tty_struct *tty)
{
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ab4f3592a11..4339b1a879c 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -5,7 +5,7 @@
#
menuconfig EDAC
- bool "EDAC - error detection and reporting"
+ bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
depends on X86 || PPC
help
@@ -232,4 +232,13 @@ config EDAC_AMD8111
Note, add more Kconfig dependency if it's adopted
on some machine other than Maple.
+config EDAC_CPC925
+ tristate "IBM CPC925 Memory Controller (PPC970FX)"
+ depends on EDAC_MM_EDAC && PPC64
+ help
+ Support for error detection and correction on the
+ IBM CPC925 Bridge and Memory Controller, which is
+ a companion chip to the PowerPC 970 family of
+ processors.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 633dc5604ee..98aa4a7db41 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -18,6 +18,7 @@ edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
+obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o
obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
index 2cb58ef743e..35b78d04bbf 100644
--- a/drivers/edac/amd8111_edac.c
+++ b/drivers/edac/amd8111_edac.c
@@ -37,7 +37,6 @@
#define AMD8111_EDAC_MOD_STR "amd8111_edac"
#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460
-static int edac_dev_idx;
enum amd8111_edac_devs {
LPC_BRIDGE = 0,
@@ -377,7 +376,7 @@ static int amd8111_dev_probe(struct pci_dev *dev,
* edac_device_ctl_info, but make use of existing
* one instead.
*/
- dev_info->edac_idx = edac_dev_idx++;
+ dev_info->edac_idx = edac_device_alloc_index();
dev_info->edac_dev =
edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1,
NULL, 0, 0,
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index cb0f639f049..c973004c002 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -227,7 +227,7 @@ static struct platform_driver cell_edac_driver = {
.owner = THIS_MODULE,
},
.probe = cell_edac_probe,
- .remove = cell_edac_remove,
+ .remove = __devexit_p(cell_edac_remove),
};
static int __init cell_edac_init(void)
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
new file mode 100644
index 00000000000..8c54196b5ab
--- /dev/null
+++ b/drivers/edac/cpc925_edac.c
@@ -0,0 +1,1017 @@
+/*
+ * cpc925_edac.c, EDAC driver for IBM CPC925 Bridge and Memory Controller.
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * Authors: Cao Qingtao <qingtao.cao@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/edac.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "edac_core.h"
+#include "edac_module.h"
+
+#define CPC925_EDAC_REVISION " Ver: 1.0.0 " __DATE__
+#define CPC925_EDAC_MOD_STR "cpc925_edac"
+
+#define cpc925_printk(level, fmt, arg...) \
+ edac_printk(level, "CPC925", fmt, ##arg)
+
+#define cpc925_mc_printk(mci, level, fmt, arg...) \
+ edac_mc_chipset_printk(mci, level, "CPC925", fmt, ##arg)
+
+/*
+ * CPC925 registers are of 32 bits with bit0 defined at the
+ * most significant bit and bit31 at that of least significant.
+ */
+#define CPC925_BITS_PER_REG 32
+#define CPC925_BIT(nr) (1UL << (CPC925_BITS_PER_REG - 1 - nr))
+
+/*
+ * EDAC device names for the error detections of
+ * CPU Interface and Hypertransport Link.
+ */
+#define CPC925_CPU_ERR_DEV "cpu"
+#define CPC925_HT_LINK_DEV "htlink"
+
+/* Suppose DDR Refresh cycle is 15.6 microsecond */
+#define CPC925_REF_FREQ 0xFA69
+#define CPC925_SCRUB_BLOCK_SIZE 64 /* bytes */
+#define CPC925_NR_CSROWS 8
+
+/*
+ * All registers and bits definitions are taken from
+ * "CPC925 Bridge and Memory Controller User Manual, SA14-2761-02".
+ */
+
+/*
+ * CPU and Memory Controller Registers
+ */
+/************************************************************
+ * Processor Interface Exception Mask Register (APIMASK)
+ ************************************************************/
+#define REG_APIMASK_OFFSET 0x30070
+enum apimask_bits {
+ APIMASK_DART = CPC925_BIT(0), /* DART Exception */
+ APIMASK_ADI0 = CPC925_BIT(1), /* Handshake Error on PI0_ADI */
+ APIMASK_ADI1 = CPC925_BIT(2), /* Handshake Error on PI1_ADI */
+ APIMASK_STAT = CPC925_BIT(3), /* Status Exception */
+ APIMASK_DERR = CPC925_BIT(4), /* Data Error Exception */
+ APIMASK_ADRS0 = CPC925_BIT(5), /* Addressing Exception on PI0 */
+ APIMASK_ADRS1 = CPC925_BIT(6), /* Addressing Exception on PI1 */
+ /* BIT(7) Reserved */
+ APIMASK_ECC_UE_H = CPC925_BIT(8), /* UECC upper */
+ APIMASK_ECC_CE_H = CPC925_BIT(9), /* CECC upper */
+ APIMASK_ECC_UE_L = CPC925_BIT(10), /* UECC lower */
+ APIMASK_ECC_CE_L = CPC925_BIT(11), /* CECC lower */
+
+ CPU_MASK_ENABLE = (APIMASK_DART | APIMASK_ADI0 | APIMASK_ADI1 |
+ APIMASK_STAT | APIMASK_DERR | APIMASK_ADRS0 |
+ APIMASK_ADRS1),
+ ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H |
+ APIMASK_ECC_UE_L | APIMASK_ECC_CE_L),
+};
+
+/************************************************************
+ * Processor Interface Exception Register (APIEXCP)
+ ************************************************************/
+#define REG_APIEXCP_OFFSET 0x30060
+enum apiexcp_bits {
+ APIEXCP_DART = CPC925_BIT(0), /* DART Exception */
+ APIEXCP_ADI0 = CPC925_BIT(1), /* Handshake Error on PI0_ADI */
+ APIEXCP_ADI1 = CPC925_BIT(2), /* Handshake Error on PI1_ADI */
+ APIEXCP_STAT = CPC925_BIT(3), /* Status Exception */
+ APIEXCP_DERR = CPC925_BIT(4), /* Data Error Exception */
+ APIEXCP_ADRS0 = CPC925_BIT(5), /* Addressing Exception on PI0 */
+ APIEXCP_ADRS1 = CPC925_BIT(6), /* Addressing Exception on PI1 */
+ /* BIT(7) Reserved */
+ APIEXCP_ECC_UE_H = CPC925_BIT(8), /* UECC upper */
+ APIEXCP_ECC_CE_H = CPC925_BIT(9), /* CECC upper */
+ APIEXCP_ECC_UE_L = CPC925_BIT(10), /* UECC lower */
+ APIEXCP_ECC_CE_L = CPC925_BIT(11), /* CECC lower */
+
+ CPU_EXCP_DETECTED = (APIEXCP_DART | APIEXCP_ADI0 | APIEXCP_ADI1 |
+ APIEXCP_STAT | APIEXCP_DERR | APIEXCP_ADRS0 |
+ APIEXCP_ADRS1),
+ UECC_EXCP_DETECTED = (APIEXCP_ECC_UE_H | APIEXCP_ECC_UE_L),
+ CECC_EXCP_DETECTED = (APIEXCP_ECC_CE_H | APIEXCP_ECC_CE_L),
+ ECC_EXCP_DETECTED = (UECC_EXCP_DETECTED | CECC_EXCP_DETECTED),
+};
+
+/************************************************************
+ * Memory Bus Configuration Register (MBCR)
+************************************************************/
+#define REG_MBCR_OFFSET 0x2190
+#define MBCR_64BITCFG_SHIFT 23
+#define MBCR_64BITCFG_MASK (1UL << MBCR_64BITCFG_SHIFT)
+#define MBCR_64BITBUS_SHIFT 22
+#define MBCR_64BITBUS_MASK (1UL << MBCR_64BITBUS_SHIFT)
+
+/************************************************************
+ * Memory Bank Mode Register (MBMR)
+************************************************************/
+#define REG_MBMR_OFFSET 0x21C0
+#define MBMR_MODE_MAX_VALUE 0xF
+#define MBMR_MODE_SHIFT 25
+#define MBMR_MODE_MASK (MBMR_MODE_MAX_VALUE << MBMR_MODE_SHIFT)
+#define MBMR_BBA_SHIFT 24
+#define MBMR_BBA_MASK (1UL << MBMR_BBA_SHIFT)
+
+/************************************************************
+ * Memory Bank Boundary Address Register (MBBAR)
+ ************************************************************/
+#define REG_MBBAR_OFFSET 0x21D0
+#define MBBAR_BBA_MAX_VALUE 0xFF
+#define MBBAR_BBA_SHIFT 24
+#define MBBAR_BBA_MASK (MBBAR_BBA_MAX_VALUE << MBBAR_BBA_SHIFT)
+
+/************************************************************
+ * Memory Scrub Control Register (MSCR)
+ ************************************************************/
+#define REG_MSCR_OFFSET 0x2400
+#define MSCR_SCRUB_MOD_MASK 0xC0000000 /* scrub_mod - bit0:1*/
+#define MSCR_BACKGR_SCRUB 0x40000000 /* 01 */
+#define MSCR_SI_SHIFT 16 /* si - bit8:15*/
+#define MSCR_SI_MAX_VALUE 0xFF
+#define MSCR_SI_MASK (MSCR_SI_MAX_VALUE << MSCR_SI_SHIFT)
+
+/************************************************************
+ * Memory Scrub Range Start Register (MSRSR)
+ ************************************************************/
+#define REG_MSRSR_OFFSET 0x2410
+
+/************************************************************
+ * Memory Scrub Range End Register (MSRER)
+ ************************************************************/
+#define REG_MSRER_OFFSET 0x2420
+
+/************************************************************
+ * Memory Scrub Pattern Register (MSPR)
+ ************************************************************/
+#define REG_MSPR_OFFSET 0x2430
+
+/************************************************************
+ * Memory Check Control Register (MCCR)
+ ************************************************************/
+#define REG_MCCR_OFFSET 0x2440
+enum mccr_bits {
+ MCCR_ECC_EN = CPC925_BIT(0), /* ECC high and low check */
+};
+
+/************************************************************
+ * Memory Check Range End Register (MCRER)
+ ************************************************************/
+#define REG_MCRER_OFFSET 0x2450
+
+/************************************************************
+ * Memory Error Address Register (MEAR)
+ ************************************************************/
+#define REG_MEAR_OFFSET 0x2460
+#define MEAR_BCNT_MAX_VALUE 0x3
+#define MEAR_BCNT_SHIFT 30
+#define MEAR_BCNT_MASK (MEAR_BCNT_MAX_VALUE << MEAR_BCNT_SHIFT)
+#define MEAR_RANK_MAX_VALUE 0x7
+#define MEAR_RANK_SHIFT 27
+#define MEAR_RANK_MASK (MEAR_RANK_MAX_VALUE << MEAR_RANK_SHIFT)
+#define MEAR_COL_MAX_VALUE 0x7FF
+#define MEAR_COL_SHIFT 16
+#define MEAR_COL_MASK (MEAR_COL_MAX_VALUE << MEAR_COL_SHIFT)
+#define MEAR_BANK_MAX_VALUE 0x3
+#define MEAR_BANK_SHIFT 14
+#define MEAR_BANK_MASK (MEAR_BANK_MAX_VALUE << MEAR_BANK_SHIFT)
+#define MEAR_ROW_MASK 0x00003FFF
+
+/************************************************************
+ * Memory Error Syndrome Register (MESR)
+ ************************************************************/
+#define REG_MESR_OFFSET 0x2470
+#define MESR_ECC_SYN_H_MASK 0xFF00
+#define MESR_ECC_SYN_L_MASK 0x00FF
+
+/************************************************************
+ * Memory Mode Control Register (MMCR)
+ ************************************************************/
+#define REG_MMCR_OFFSET 0x2500
+enum mmcr_bits {
+ MMCR_REG_DIMM_MODE = CPC925_BIT(3),
+};
+
+/*
+ * HyperTransport Link Registers
+ */
+/************************************************************
+ * Error Handling/Enumeration Scratch Pad Register (ERRCTRL)
+ ************************************************************/
+#define REG_ERRCTRL_OFFSET 0x70140
+enum errctrl_bits { /* nonfatal interrupts for */
+ ERRCTRL_SERR_NF = CPC925_BIT(0), /* system error */
+ ERRCTRL_CRC_NF = CPC925_BIT(1), /* CRC error */
+ ERRCTRL_RSP_NF = CPC925_BIT(2), /* Response error */
+ ERRCTRL_EOC_NF = CPC925_BIT(3), /* End-Of-Chain error */
+ ERRCTRL_OVF_NF = CPC925_BIT(4), /* Overflow error */
+ ERRCTRL_PROT_NF = CPC925_BIT(5), /* Protocol error */
+
+ ERRCTRL_RSP_ERR = CPC925_BIT(6), /* Response error received */
+ ERRCTRL_CHN_FAL = CPC925_BIT(7), /* Sync flooding detected */
+
+ HT_ERRCTRL_ENABLE = (ERRCTRL_SERR_NF | ERRCTRL_CRC_NF |
+ ERRCTRL_RSP_NF | ERRCTRL_EOC_NF |
+ ERRCTRL_OVF_NF | ERRCTRL_PROT_NF),
+ HT_ERRCTRL_DETECTED = (ERRCTRL_RSP_ERR | ERRCTRL_CHN_FAL),
+};
+
+/************************************************************
+ * Link Configuration and Link Control Register (LINKCTRL)
+ ************************************************************/
+#define REG_LINKCTRL_OFFSET 0x70110
+enum linkctrl_bits {
+ LINKCTRL_CRC_ERR = (CPC925_BIT(22) | CPC925_BIT(23)),
+ LINKCTRL_LINK_FAIL = CPC925_BIT(27),
+
+ HT_LINKCTRL_DETECTED = (LINKCTRL_CRC_ERR | LINKCTRL_LINK_FAIL),
+};
+
+/************************************************************
+ * Link FreqCap/Error/Freq/Revision ID Register (LINKERR)
+ ************************************************************/
+#define REG_LINKERR_OFFSET 0x70120
+enum linkerr_bits {
+ LINKERR_EOC_ERR = CPC925_BIT(17), /* End-Of-Chain error */
+ LINKERR_OVF_ERR = CPC925_BIT(18), /* Receive Buffer Overflow */
+ LINKERR_PROT_ERR = CPC925_BIT(19), /* Protocol error */
+
+ HT_LINKERR_DETECTED = (LINKERR_EOC_ERR | LINKERR_OVF_ERR |
+ LINKERR_PROT_ERR),
+};
+
+/************************************************************
+ * Bridge Control Register (BRGCTRL)
+ ************************************************************/
+#define REG_BRGCTRL_OFFSET 0x70300
+enum brgctrl_bits {
+ BRGCTRL_DETSERR = CPC925_BIT(0), /* SERR on Secondary Bus */
+ BRGCTRL_SECBUSRESET = CPC925_BIT(9), /* Secondary Bus Reset */
+};
+
+/* Private structure for edac memory controller */
+struct cpc925_mc_pdata {
+ void __iomem *vbase;
+ unsigned long total_mem;
+ const char *name;
+ int edac_idx;
+};
+
+/* Private structure for common edac device */
+struct cpc925_dev_info {
+ void __iomem *vbase;
+ struct platform_device *pdev;
+ char *ctl_name;
+ int edac_idx;
+ struct edac_device_ctl_info *edac_dev;
+ void (*init)(struct cpc925_dev_info *dev_info);
+ void (*exit)(struct cpc925_dev_info *dev_info);
+ void (*check)(struct edac_device_ctl_info *edac_dev);
+};
+
+/* Get total memory size from Open Firmware DTB */
+static void get_total_mem(struct cpc925_mc_pdata *pdata)
+{
+ struct device_node *np = NULL;
+ const unsigned int *reg, *reg_end;
+ int len, sw, aw;
+ unsigned long start, size;
+
+ np = of_find_node_by_type(NULL, "memory");
+ if (!np)
+ return;
+
+ aw = of_n_addr_cells(np);
+ sw = of_n_size_cells(np);
+ reg = (const unsigned int *)of_get_property(np, "reg", &len);
+ reg_end = reg + len/4;
+
+ pdata->total_mem = 0;
+ do {
+ start = of_read_number(reg, aw);
+ reg += aw;
+ size = of_read_number(reg, sw);
+ reg += sw;
+ debugf1("%s: start 0x%lx, size 0x%lx\n", __func__,
+ start, size);
+ pdata->total_mem += size;
+ } while (reg < reg_end);
+
+ of_node_put(np);
+ debugf0("%s: total_mem 0x%lx\n", __func__, pdata->total_mem);
+}
+
+static void cpc925_init_csrows(struct mem_ctl_info *mci)
+{
+ struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ struct csrow_info *csrow;
+ int index;
+ u32 mbmr, mbbar, bba;
+ unsigned long row_size, last_nr_pages = 0;
+
+ get_total_mem(pdata);
+
+ for (index = 0; index < mci->nr_csrows; index++) {
+ mbmr = __raw_readl(pdata->vbase + REG_MBMR_OFFSET +
+ 0x20 * index);
+ mbbar = __raw_readl(pdata->vbase + REG_MBBAR_OFFSET +
+ 0x20 + index);
+ bba = (((mbmr & MBMR_BBA_MASK) >> MBMR_BBA_SHIFT) << 8) |
+ ((mbbar & MBBAR_BBA_MASK) >> MBBAR_BBA_SHIFT);
+
+ if (bba == 0)
+ continue; /* not populated */
+
+ csrow = &mci->csrows[index];
+
+ row_size = bba * (1UL << 28); /* 256M */
+ csrow->first_page = last_nr_pages;
+ csrow->nr_pages = row_size >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+ last_nr_pages = csrow->last_page + 1;
+
+ csrow->mtype = MEM_RDDR;
+ csrow->edac_mode = EDAC_SECDED;
+
+ switch (csrow->nr_channels) {
+ case 1: /* Single channel */
+ csrow->grain = 32; /* four-beat burst of 32 bytes */
+ break;
+ case 2: /* Dual channel */
+ default:
+ csrow->grain = 64; /* four-beat burst of 64 bytes */
+ break;
+ }
+
+ switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
+ case 6: /* 0110, no way to differentiate X8 VS X16 */
+ case 5: /* 0101 */
+ case 8: /* 1000 */
+ csrow->dtype = DEV_X16;
+ break;
+ case 7: /* 0111 */
+ case 9: /* 1001 */
+ csrow->dtype = DEV_X8;
+ break;
+ default:
+ csrow->dtype = DEV_UNKNOWN;
+ break;
+ }
+ }
+}
+
+/* Enable memory controller ECC detection */
+static void cpc925_mc_init(struct mem_ctl_info *mci)
+{
+ struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ u32 apimask;
+ u32 mccr;
+
+ /* Enable various ECC error exceptions */
+ apimask = __raw_readl(pdata->vbase + REG_APIMASK_OFFSET);
+ if ((apimask & ECC_MASK_ENABLE) == 0) {
+ apimask |= ECC_MASK_ENABLE;
+ __raw_writel(apimask, pdata->vbase + REG_APIMASK_OFFSET);
+ }
+
+ /* Enable ECC detection */
+ mccr = __raw_readl(pdata->vbase + REG_MCCR_OFFSET);
+ if ((mccr & MCCR_ECC_EN) == 0) {
+ mccr |= MCCR_ECC_EN;
+ __raw_writel(mccr, pdata->vbase + REG_MCCR_OFFSET);
+ }
+}
+
+/* Disable memory controller ECC detection */
+static void cpc925_mc_exit(struct mem_ctl_info *mci)
+{
+ /*
+ * WARNING:
+ * We are supposed to clear the ECC error detection bits,
+ * and it will be no problem to do so. However, once they
+ * are cleared here if we want to re-install CPC925 EDAC
+ * module later, setting them up in cpc925_mc_init() will
+ * trigger machine check exception.
+ * Also, it's ok to leave ECC error detection bits enabled,
+ * since they are reset to 1 by default or by boot loader.
+ */
+
+ return;
+}
+
+/*
+ * Revert DDR column/row/bank addresses into page frame number and
+ * offset in page.
+ *
+ * Suppose memory mode is 0x0111(128-bit mode, identical DIMM pairs),
+ * physical address(PA) bits to column address(CA) bits mappings are:
+ * CA 0 1 2 3 4 5 6 7 8 9 10
+ * PA 59 58 57 56 55 54 53 52 51 50 49
+ *
+ * physical address(PA) bits to bank address(BA) bits mappings are:
+ * BA 0 1
+ * PA 43 44
+ *
+ * physical address(PA) bits to row address(RA) bits mappings are:
+ * RA 0 1 2 3 4 5 6 7 8 9 10 11 12
+ * PA 36 35 34 48 47 46 45 40 41 42 39 38 37
+ */
+static void cpc925_mc_get_pfn(struct mem_ctl_info *mci, u32 mear,
+ unsigned long *pfn, unsigned long *offset, int *csrow)
+{
+ u32 bcnt, rank, col, bank, row;
+ u32 c;
+ unsigned long pa;
+ int i;
+
+ bcnt = (mear & MEAR_BCNT_MASK) >> MEAR_BCNT_SHIFT;
+ rank = (mear & MEAR_RANK_MASK) >> MEAR_RANK_SHIFT;
+ col = (mear & MEAR_COL_MASK) >> MEAR_COL_SHIFT;
+ bank = (mear & MEAR_BANK_MASK) >> MEAR_BANK_SHIFT;
+ row = mear & MEAR_ROW_MASK;
+
+ *csrow = rank;
+
+#ifdef CONFIG_EDAC_DEBUG
+ if (mci->csrows[rank].first_page == 0) {
+ cpc925_mc_printk(mci, KERN_ERR, "ECC occurs in a "
+ "non-populated csrow, broken hardware?\n");
+ return;
+ }
+#endif
+
+ /* Revert csrow number */
+ pa = mci->csrows[rank].first_page << PAGE_SHIFT;
+
+ /* Revert column address */
+ col += bcnt;
+ for (i = 0; i < 11; i++) {
+ c = col & 0x1;
+ col >>= 1;
+ pa |= c << (14 - i);
+ }
+
+ /* Revert bank address */
+ pa |= bank << 19;
+
+ /* Revert row address, in 4 steps */
+ for (i = 0; i < 3; i++) {
+ c = row & 0x1;
+ row >>= 1;
+ pa |= c << (26 - i);
+ }
+
+ for (i = 0; i < 3; i++) {
+ c = row & 0x1;
+ row >>= 1;
+ pa |= c << (21 + i);
+ }
+
+ for (i = 0; i < 4; i++) {
+ c = row & 0x1;
+ row >>= 1;
+ pa |= c << (18 - i);
+ }
+
+ for (i = 0; i < 3; i++) {
+ c = row & 0x1;
+ row >>= 1;
+ pa |= c << (29 - i);
+ }
+
+ *offset = pa & (PAGE_SIZE - 1);
+ *pfn = pa >> PAGE_SHIFT;
+
+ debugf0("%s: ECC physical address 0x%lx\n", __func__, pa);
+}
+
+static int cpc925_mc_find_channel(struct mem_ctl_info *mci, u16 syndrome)
+{
+ if ((syndrome & MESR_ECC_SYN_H_MASK) == 0)
+ return 0;
+
+ if ((syndrome & MESR_ECC_SYN_L_MASK) == 0)
+ return 1;
+
+ cpc925_mc_printk(mci, KERN_INFO, "Unexpected syndrome value: 0x%x\n",
+ syndrome);
+ return 1;
+}
+
+/* Check memory controller registers for ECC errors */
+static void cpc925_mc_check(struct mem_ctl_info *mci)
+{
+ struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ u32 apiexcp;
+ u32 mear;
+ u32 mesr;
+ u16 syndrome;
+ unsigned long pfn = 0, offset = 0;
+ int csrow = 0, channel = 0;
+
+ /* APIEXCP is cleared when read */
+ apiexcp = __raw_readl(pdata->vbase + REG_APIEXCP_OFFSET);
+ if ((apiexcp & ECC_EXCP_DETECTED) == 0)
+ return;
+
+ mesr = __raw_readl(pdata->vbase + REG_MESR_OFFSET);
+ syndrome = mesr | (MESR_ECC_SYN_H_MASK | MESR_ECC_SYN_L_MASK);
+
+ mear = __raw_readl(pdata->vbase + REG_MEAR_OFFSET);
+
+ /* Revert column/row addresses into page frame number, etc */
+ cpc925_mc_get_pfn(mci, mear, &pfn, &offset, &csrow);
+
+ if (apiexcp & CECC_EXCP_DETECTED) {
+ cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
+ channel = cpc925_mc_find_channel(mci, syndrome);
+ edac_mc_handle_ce(mci, pfn, offset, syndrome,
+ csrow, channel, mci->ctl_name);
+ }
+
+ if (apiexcp & UECC_EXCP_DETECTED) {
+ cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
+ edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name);
+ }
+
+ cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n");
+ cpc925_mc_printk(mci, KERN_INFO, "APIMASK 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_APIMASK_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "APIEXCP 0x%08x\n",
+ apiexcp);
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Ctrl 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MSCR_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Rge Start 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MSRSR_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Rge End 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MSRER_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Scrub Pattern 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MSPR_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Chk Ctrl 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MCCR_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Chk Rge End 0x%08x\n",
+ __raw_readl(pdata->vbase + REG_MCRER_OFFSET));
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Err Address 0x%08x\n",
+ mesr);
+ cpc925_mc_printk(mci, KERN_INFO, "Mem Err Syndrome 0x%08x\n",
+ syndrome);
+}
+
+/******************** CPU err device********************************/
+/* Enable CPU Errors detection */
+static void cpc925_cpu_init(struct cpc925_dev_info *dev_info)
+{
+ u32 apimask;
+
+ apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
+ if ((apimask & CPU_MASK_ENABLE) == 0) {
+ apimask |= CPU_MASK_ENABLE;
+ __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
+ }
+}
+
+/* Disable CPU Errors detection */
+static void cpc925_cpu_exit(struct cpc925_dev_info *dev_info)
+{
+ /*
+ * WARNING:
+ * We are supposed to clear the CPU error detection bits,
+ * and it will be no problem to do so. However, once they
+ * are cleared here if we want to re-install CPC925 EDAC
+ * module later, setting them up in cpc925_cpu_init() will
+ * trigger machine check exception.
+ * Also, it's ok to leave CPU error detection bits enabled,
+ * since they are reset to 1 by default.
+ */
+
+ return;
+}
+
+/* Check for CPU Errors */
+static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev)
+{
+ struct cpc925_dev_info *dev_info = edac_dev->pvt_info;
+ u32 apiexcp;
+ u32 apimask;
+
+ /* APIEXCP is cleared when read */
+ apiexcp = __raw_readl(dev_info->vbase + REG_APIEXCP_OFFSET);
+ if ((apiexcp & CPU_EXCP_DETECTED) == 0)
+ return;
+
+ apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
+ cpc925_printk(KERN_INFO, "Processor Interface Fault\n"
+ "Processor Interface register dump:\n");
+ cpc925_printk(KERN_INFO, "APIMASK 0x%08x\n", apimask);
+ cpc925_printk(KERN_INFO, "APIEXCP 0x%08x\n", apiexcp);
+
+ edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
+}
+
+/******************** HT Link err device****************************/
+/* Enable HyperTransport Link Error detection */
+static void cpc925_htlink_init(struct cpc925_dev_info *dev_info)
+{
+ u32 ht_errctrl;
+
+ ht_errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET);
+ if ((ht_errctrl & HT_ERRCTRL_ENABLE) == 0) {
+ ht_errctrl |= HT_ERRCTRL_ENABLE;
+ __raw_writel(ht_errctrl, dev_info->vbase + REG_ERRCTRL_OFFSET);
+ }
+}
+
+/* Disable HyperTransport Link Error detection */
+static void cpc925_htlink_exit(struct cpc925_dev_info *dev_info)
+{
+ u32 ht_errctrl;
+
+ ht_errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET);
+ ht_errctrl &= ~HT_ERRCTRL_ENABLE;
+ __raw_writel(ht_errctrl, dev_info->vbase + REG_ERRCTRL_OFFSET);
+}
+
+/* Check for HyperTransport Link errors */
+static void cpc925_htlink_check(struct edac_device_ctl_info *edac_dev)
+{
+ struct cpc925_dev_info *dev_info = edac_dev->pvt_info;
+ u32 brgctrl = __raw_readl(dev_info->vbase + REG_BRGCTRL_OFFSET);
+ u32 linkctrl = __raw_readl(dev_info->vbase + REG_LINKCTRL_OFFSET);
+ u32 errctrl = __raw_readl(dev_info->vbase + REG_ERRCTRL_OFFSET);
+ u32 linkerr = __raw_readl(dev_info->vbase + REG_LINKERR_OFFSET);
+
+ if (!((brgctrl & BRGCTRL_DETSERR) ||
+ (linkctrl & HT_LINKCTRL_DETECTED) ||
+ (errctrl & HT_ERRCTRL_DETECTED) ||
+ (linkerr & HT_LINKERR_DETECTED)))
+ return;
+
+ cpc925_printk(KERN_INFO, "HT Link Fault\n"
+ "HT register dump:\n");
+ cpc925_printk(KERN_INFO, "Bridge Ctrl 0x%08x\n",
+ brgctrl);
+ cpc925_printk(KERN_INFO, "Link Config Ctrl 0x%08x\n",
+ linkctrl);
+ cpc925_printk(KERN_INFO, "Error Enum and Ctrl 0x%08x\n",
+ errctrl);
+ cpc925_printk(KERN_INFO, "Link Error 0x%08x\n",
+ linkerr);
+
+ /* Clear by write 1 */
+ if (brgctrl & BRGCTRL_DETSERR)
+ __raw_writel(BRGCTRL_DETSERR,
+ dev_info->vbase + REG_BRGCTRL_OFFSET);
+
+ if (linkctrl & HT_LINKCTRL_DETECTED)
+ __raw_writel(HT_LINKCTRL_DETECTED,
+ dev_info->vbase + REG_LINKCTRL_OFFSET);
+
+ /* Initiate Secondary Bus Reset to clear the chain failure */
+ if (errctrl & ERRCTRL_CHN_FAL)
+ __raw_writel(BRGCTRL_SECBUSRESET,
+ dev_info->vbase + REG_BRGCTRL_OFFSET);
+
+ if (errctrl & ERRCTRL_RSP_ERR)
+ __raw_writel(ERRCTRL_RSP_ERR,
+ dev_info->vbase + REG_ERRCTRL_OFFSET);
+
+ if (linkerr & HT_LINKERR_DETECTED)
+ __raw_writel(HT_LINKERR_DETECTED,
+ dev_info->vbase + REG_LINKERR_OFFSET);
+
+ edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
+}
+
+static struct cpc925_dev_info cpc925_devs[] = {
+ {
+ .ctl_name = CPC925_CPU_ERR_DEV,
+ .init = cpc925_cpu_init,
+ .exit = cpc925_cpu_exit,
+ .check = cpc925_cpu_check,
+ },
+ {
+ .ctl_name = CPC925_HT_LINK_DEV,
+ .init = cpc925_htlink_init,
+ .exit = cpc925_htlink_exit,
+ .check = cpc925_htlink_check,
+ },
+ {0}, /* Terminated by NULL */
+};
+
+/*
+ * Add CPU Err detection and HyperTransport Link Err detection
+ * as common "edac_device", they have no corresponding device
+ * nodes in the Open Firmware DTB and we have to add platform
+ * devices for them. Also, they will share the MMIO with that
+ * of memory controller.
+ */
+static void cpc925_add_edac_devices(void __iomem *vbase)
+{
+ struct cpc925_dev_info *dev_info;
+
+ if (!vbase) {
+ cpc925_printk(KERN_ERR, "MMIO not established yet\n");
+ return;
+ }
+
+ for (dev_info = &cpc925_devs[0]; dev_info->init; dev_info++) {
+ dev_info->vbase = vbase;
+ dev_info->pdev = platform_device_register_simple(
+ dev_info->ctl_name, 0, NULL, 0);
+ if (IS_ERR(dev_info->pdev)) {
+ cpc925_printk(KERN_ERR,
+ "Can't register platform device for %s\n",
+ dev_info->ctl_name);
+ continue;
+ }
+
+ /*
+ * Don't have to allocate private structure but
+ * make use of cpc925_devs[] instead.
+ */
+ dev_info->edac_idx = edac_device_alloc_index();
+ dev_info->edac_dev =
+ edac_device_alloc_ctl_info(0, dev_info->ctl_name,
+ 1, NULL, 0, 0, NULL, 0, dev_info->edac_idx);
+ if (!dev_info->edac_dev) {
+ cpc925_printk(KERN_ERR, "No memory for edac device\n");
+ goto err1;
+ }
+
+ dev_info->edac_dev->pvt_info = dev_info;
+ dev_info->edac_dev->dev = &dev_info->pdev->dev;
+ dev_info->edac_dev->ctl_name = dev_info->ctl_name;
+ dev_info->edac_dev->mod_name = CPC925_EDAC_MOD_STR;
+ dev_info->edac_dev->dev_name = dev_name(&dev_info->pdev->dev);
+
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ dev_info->edac_dev->edac_check = dev_info->check;
+
+ if (dev_info->init)
+ dev_info->init(dev_info);
+
+ if (edac_device_add_device(dev_info->edac_dev) > 0) {
+ cpc925_printk(KERN_ERR,
+ "Unable to add edac device for %s\n",
+ dev_info->ctl_name);
+ goto err2;
+ }
+
+ debugf0("%s: Successfully added edac device for %s\n",
+ __func__, dev_info->ctl_name);
+
+ continue;
+
+err2:
+ if (dev_info->exit)
+ dev_info->exit(dev_info);
+ edac_device_free_ctl_info(dev_info->edac_dev);
+err1:
+ platform_device_unregister(dev_info->pdev);
+ }
+}
+
+/*
+ * Delete the common "edac_device" for CPU Err Detection
+ * and HyperTransport Link Err Detection
+ */
+static void cpc925_del_edac_devices(void)
+{
+ struct cpc925_dev_info *dev_info;
+
+ for (dev_info = &cpc925_devs[0]; dev_info->init; dev_info++) {
+ if (dev_info->edac_dev) {
+ edac_device_del_device(dev_info->edac_dev->dev);
+ edac_device_free_ctl_info(dev_info->edac_dev);
+ platform_device_unregister(dev_info->pdev);
+ }
+
+ if (dev_info->exit)
+ dev_info->exit(dev_info);
+
+ debugf0("%s: Successfully deleted edac device for %s\n",
+ __func__, dev_info->ctl_name);
+ }
+}
+
+/* Convert current back-ground scrub rate into byte/sec bandwith */
+static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+{
+ struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ u32 mscr;
+ u8 si;
+
+ mscr = __raw_readl(pdata->vbase + REG_MSCR_OFFSET);
+ si = (mscr & MSCR_SI_MASK) >> MSCR_SI_SHIFT;
+
+ debugf0("%s, Mem Scrub Ctrl Register 0x%x\n", __func__, mscr);
+
+ if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) ||
+ (si == 0)) {
+ cpc925_mc_printk(mci, KERN_INFO, "Scrub mode not enabled\n");
+ *bw = 0;
+ } else
+ *bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si;
+
+ return 0;
+}
+
+/* Return 0 for single channel; 1 for dual channel */
+static int cpc925_mc_get_channels(void __iomem *vbase)
+{
+ int dual = 0;
+ u32 mbcr;
+
+ mbcr = __raw_readl(vbase + REG_MBCR_OFFSET);
+
+ /*
+ * Dual channel only when 128-bit wide physical bus
+ * and 128-bit configuration.
+ */
+ if (((mbcr & MBCR_64BITCFG_MASK) == 0) &&
+ ((mbcr & MBCR_64BITBUS_MASK) == 0))
+ dual = 1;
+
+ debugf0("%s: %s channel\n", __func__,
+ (dual > 0) ? "Dual" : "Single");
+
+ return dual;
+}
+
+static int __devinit cpc925_probe(struct platform_device *pdev)
+{
+ static int edac_mc_idx;
+ struct mem_ctl_info *mci;
+ void __iomem *vbase;
+ struct cpc925_mc_pdata *pdata;
+ struct resource *r;
+ int res = 0, nr_channels;
+
+ debugf0("%s: %s platform device found!\n", __func__, pdev->name);
+
+ if (!devres_open_group(&pdev->dev, cpc925_probe, GFP_KERNEL)) {
+ res = -ENOMEM;
+ goto out;
+ }
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ cpc925_printk(KERN_ERR, "Unable to get resource\n");
+ res = -ENOENT;
+ goto err1;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev,
+ r->start,
+ r->end - r->start + 1,
+ pdev->name)) {
+ cpc925_printk(KERN_ERR, "Unable to request mem region\n");
+ res = -EBUSY;
+ goto err1;
+ }
+
+ vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1);
+ if (!vbase) {
+ cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
+ res = -ENOMEM;
+ goto err2;
+ }
+
+ nr_channels = cpc925_mc_get_channels(vbase);
+ mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata),
+ CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx);
+ if (!mci) {
+ cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n");
+ res = -ENOMEM;
+ goto err2;
+ }
+
+ pdata = mci->pvt_info;
+ pdata->vbase = vbase;
+ pdata->edac_idx = edac_mc_idx++;
+ pdata->name = pdev->name;
+
+ mci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, mci);
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->mod_name = CPC925_EDAC_MOD_STR;
+ mci->mod_ver = CPC925_EDAC_REVISION;
+ mci->ctl_name = pdev->name;
+
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ mci->edac_check = cpc925_mc_check;
+
+ mci->ctl_page_to_phys = NULL;
+ mci->scrub_mode = SCRUB_SW_SRC;
+ mci->set_sdram_scrub_rate = NULL;
+ mci->get_sdram_scrub_rate = cpc925_get_sdram_scrub_rate;
+
+ cpc925_init_csrows(mci);
+
+ /* Setup memory controller registers */
+ cpc925_mc_init(mci);
+
+ if (edac_mc_add_mc(mci) > 0) {
+ cpc925_mc_printk(mci, KERN_ERR, "Failed edac_mc_add_mc()\n");
+ goto err3;
+ }
+
+ cpc925_add_edac_devices(vbase);
+
+ /* get this far and it's successful */
+ debugf0("%s: success\n", __func__);
+
+ res = 0;
+ goto out;
+
+err3:
+ cpc925_mc_exit(mci);
+ edac_mc_free(mci);
+err2:
+ devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1);
+err1:
+ devres_release_group(&pdev->dev, cpc925_probe);
+out:
+ return res;
+}
+
+static int cpc925_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ /*
+ * Delete common edac devices before edac mc, because
+ * the former share the MMIO of the latter.
+ */
+ cpc925_del_edac_devices();
+ cpc925_mc_exit(mci);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+static struct platform_driver cpc925_edac_driver = {
+ .probe = cpc925_probe,
+ .remove = cpc925_remove,
+ .driver = {
+ .name = "cpc925_edac",
+ }
+};
+
+static int __init cpc925_edac_init(void)
+{
+ int ret = 0;
+
+ printk(KERN_INFO "IBM CPC925 EDAC driver " CPC925_EDAC_REVISION "\n");
+ printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc\n");
+
+ /* Only support POLL mode so far */
+ edac_op_state = EDAC_OPSTATE_POLL;
+
+ ret = platform_driver_register(&cpc925_edac_driver);
+ if (ret) {
+ printk(KERN_WARNING "Failed to register %s\n",
+ CPC925_EDAC_MOD_STR);
+ }
+
+ return ret;
+}
+
+static void __exit cpc925_edac_exit(void)
+{
+ platform_driver_unregister(&cpc925_edac_driver);
+}
+
+module_init(cpc925_edac_init);
+module_exit(cpc925_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cao Qingtao <qingtao.cao@windriver.com>");
+MODULE_DESCRIPTION("IBM CPC925 Bridge and MC EDAC kernel module");
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 48d3b140983..3493c6bdb82 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -841,6 +841,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
int inst_nr, int block_nr, const char *msg);
+extern int edac_device_alloc_index(void);
/*
* edac_pci APIs
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index a7d2c717d03..b02a6a69a8f 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -490,6 +490,20 @@ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
mutex_unlock(&device_ctls_mutex);
}
+/*
+ * edac_device_alloc_index: Allocate a unique device index number
+ *
+ * Return:
+ * allocated index number
+ */
+int edac_device_alloc_index(void)
+{
+ static atomic_t device_indexes = ATOMIC_INIT(0);
+
+ return atomic_inc_return(&device_indexes) - 1;
+}
+EXPORT_SYMBOL_GPL(edac_device_alloc_index);
+
/**
* edac_device_add_device: Insert the 'edac_dev' structure into the
* edac_device global list and create sysfs entries associated with
diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 3e7f4e06386..7b82eaae262 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -287,7 +287,7 @@ exit_destroy:
return ret;
}
-static int max7301_remove(struct spi_device *spi)
+static int __devexit max7301_remove(struct spi_device *spi)
{
struct max7301 *ts;
int ret;
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 8dc0164bd51..cdb6574d25a 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -15,6 +15,10 @@
#include <linux/init.h>
#include <linux/i2c.h>
#include <linux/i2c/pca953x.h>
+#ifdef CONFIG_OF_GPIO
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#endif
#include <asm/gpio.h>
@@ -32,6 +36,7 @@ static const struct i2c_device_id pca953x_id[] = {
{ "pca9539", 16, },
{ "pca9554", 8, },
{ "pca9555", 16, },
+ { "pca9556", 8, },
{ "pca9557", 8, },
{ "max7310", 8, },
@@ -49,7 +54,9 @@ struct pca953x_chip {
uint16_t reg_direction;
struct i2c_client *client;
+ struct pca953x_platform_data *dyn_pdata;
struct gpio_chip gpio_chip;
+ char **names;
};
static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
@@ -192,8 +199,57 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
gc->label = chip->client->name;
gc->dev = &chip->client->dev;
gc->owner = THIS_MODULE;
+ gc->names = chip->names;
}
+/*
+ * Handlers for alternative sources of platform_data
+ */
+#ifdef CONFIG_OF_GPIO
+/*
+ * Translate OpenFirmware node properties into platform_data
+ */
+static struct pca953x_platform_data *
+pca953x_get_alt_pdata(struct i2c_client *client)
+{
+ struct pca953x_platform_data *pdata;
+ struct device_node *node;
+ const uint16_t *val;
+
+ node = dev_archdata_get_node(&client->dev.archdata);
+ if (node == NULL)
+ return NULL;
+
+ pdata = kzalloc(sizeof(struct pca953x_platform_data), GFP_KERNEL);
+ if (pdata == NULL) {
+ dev_err(&client->dev, "Unable to allocate platform_data\n");
+ return NULL;
+ }
+
+ pdata->gpio_base = -1;
+ val = of_get_property(node, "linux,gpio-base", NULL);
+ if (val) {
+ if (*val < 0)
+ dev_warn(&client->dev,
+ "invalid gpio-base in device tree\n");
+ else
+ pdata->gpio_base = *val;
+ }
+
+ val = of_get_property(node, "polarity", NULL);
+ if (val)
+ pdata->invert = *val;
+
+ return pdata;
+}
+#else
+static struct pca953x_platform_data *
+pca953x_get_alt_pdata(struct i2c_client *client)
+{
+ return NULL;
+}
+#endif
+
static int __devinit pca953x_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
@@ -201,20 +257,32 @@ static int __devinit pca953x_probe(struct i2c_client *client,
struct pca953x_chip *chip;
int ret;
+ chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL);
+ if (chip == NULL)
+ return -ENOMEM;
+
pdata = client->dev.platform_data;
if (pdata == NULL) {
- dev_dbg(&client->dev, "no platform data\n");
- return -EINVAL;
+ pdata = pca953x_get_alt_pdata(client);
+ /*
+ * Unlike normal platform_data, this is allocated
+ * dynamically and must be freed in the driver
+ */
+ chip->dyn_pdata = pdata;
}
- chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL);
- if (chip == NULL)
- return -ENOMEM;
+ if (pdata == NULL) {
+ dev_dbg(&client->dev, "no platform data\n");
+ ret = -EINVAL;
+ goto out_failed;
+ }
chip->client = client;
chip->gpio_start = pdata->gpio_base;
+ chip->names = pdata->names;
+
/* initialize cached registers from their original values.
* we can't share this chip with another i2c master.
*/
@@ -249,6 +317,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
return 0;
out_failed:
+ kfree(chip->dyn_pdata);
kfree(chip);
return ret;
}
@@ -276,6 +345,7 @@ static int pca953x_remove(struct i2c_client *client)
return ret;
}
+ kfree(chip->dyn_pdata);
kfree(chip);
return 0;
}
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 8695809b24b..87d88dbb667 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -255,14 +255,14 @@ static void status(struct seq_file *seq, mddev_t *mddev)
}
-static int reconfig(mddev_t *mddev, int layout, int chunk_size)
+static int reshape(mddev_t *mddev)
{
- int mode = layout & ModeMask;
- int count = layout >> ModeShift;
+ int mode = mddev->new_layout & ModeMask;
+ int count = mddev->new_layout >> ModeShift;
conf_t *conf = mddev->private;
- if (chunk_size != -1)
- return -EINVAL;
+ if (mddev->new_layout < 0)
+ return 0;
/* new layout */
if (mode == ClearFaults)
@@ -279,6 +279,7 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
atomic_set(&conf->counters[mode], count);
} else
return -EINVAL;
+ mddev->new_layout = -1;
mddev->layout = -1; /* makes sure further changes come through */
return 0;
}
@@ -298,8 +299,12 @@ static int run(mddev_t *mddev)
{
mdk_rdev_t *rdev;
int i;
+ conf_t *conf;
+
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
- conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL);
+ conf = kmalloc(sizeof(*conf), GFP_KERNEL);
if (!conf)
return -ENOMEM;
@@ -315,7 +320,7 @@ static int run(mddev_t *mddev)
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
mddev->private = conf;
- reconfig(mddev, mddev->layout, -1);
+ reshape(mddev);
return 0;
}
@@ -338,7 +343,7 @@ static struct mdk_personality faulty_personality =
.run = run,
.stop = stop,
.status = status,
- .reconfig = reconfig,
+ .check_reshape = reshape,
.size = faulty_size,
};
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 64f1f3e046e..15c8b7b25a9 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -27,19 +27,27 @@
*/
static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
{
- dev_info_t *hash;
- linear_conf_t *conf = mddev_to_conf(mddev);
- sector_t idx = sector >> conf->sector_shift;
+ int lo, mid, hi;
+ linear_conf_t *conf;
+
+ lo = 0;
+ hi = mddev->raid_disks - 1;
+ conf = rcu_dereference(mddev->private);
/*
- * sector_div(a,b) returns the remainer and sets a to a/b
+ * Binary Search
*/
- (void)sector_div(idx, conf->spacing);
- hash = conf->hash_table[idx];
- while (sector >= hash->num_sectors + hash->start_sector)
- hash++;
- return hash;
+ while (hi > lo) {
+
+ mid = (hi + lo) / 2;
+ if (sector < conf->disks[mid].end_sector)
+ hi = mid;
+ else
+ lo = mid + 1;
+ }
+
+ return conf->disks + lo;
}
/**
@@ -59,8 +67,10 @@ static int linear_mergeable_bvec(struct request_queue *q,
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
+ rcu_read_lock();
dev0 = which_dev(mddev, sector);
- maxsectors = dev0->num_sectors - (sector - dev0->start_sector);
+ maxsectors = dev0->end_sector - sector;
+ rcu_read_unlock();
if (maxsectors < bio_sectors)
maxsectors = 0;
@@ -79,46 +89,57 @@ static int linear_mergeable_bvec(struct request_queue *q,
static void linear_unplug(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
- linear_conf_t *conf = mddev_to_conf(mddev);
+ linear_conf_t *conf;
int i;
+ rcu_read_lock();
+ conf = rcu_dereference(mddev->private);
+
for (i=0; i < mddev->raid_disks; i++) {
struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
blk_unplug(r_queue);
}
+ rcu_read_unlock();
}
static int linear_congested(void *data, int bits)
{
mddev_t *mddev = data;
- linear_conf_t *conf = mddev_to_conf(mddev);
+ linear_conf_t *conf;
int i, ret = 0;
+ rcu_read_lock();
+ conf = rcu_dereference(mddev->private);
+
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
ret |= bdi_congested(&q->backing_dev_info, bits);
}
+
+ rcu_read_unlock();
return ret;
}
static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
- linear_conf_t *conf = mddev_to_conf(mddev);
+ linear_conf_t *conf;
+ sector_t array_sectors;
+ rcu_read_lock();
+ conf = rcu_dereference(mddev->private);
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
+ array_sectors = conf->array_sectors;
+ rcu_read_unlock();
- return conf->array_sectors;
+ return array_sectors;
}
static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
{
linear_conf_t *conf;
- dev_info_t **table;
mdk_rdev_t *rdev;
- int i, nb_zone, cnt;
- sector_t min_sectors;
- sector_t curr_sector;
+ int i, cnt;
conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
GFP_KERNEL);
@@ -131,6 +152,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
list_for_each_entry(rdev, &mddev->disks, same_set) {
int j = rdev->raid_disk;
dev_info_t *disk = conf->disks + j;
+ sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
printk("linear: disk numbering problem. Aborting!\n");
@@ -138,6 +160,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
}
disk->rdev = rdev;
+ if (mddev->chunk_sectors) {
+ sectors = rdev->sectors;
+ sector_div(sectors, mddev->chunk_sectors);
+ rdev->sectors = sectors * mddev->chunk_sectors;
+ }
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
@@ -149,102 +176,24 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
- disk->num_sectors = rdev->sectors;
conf->array_sectors += rdev->sectors;
-
cnt++;
+
}
if (cnt != raid_disks) {
printk("linear: not enough drives present. Aborting!\n");
goto out;
}
- min_sectors = conf->array_sectors;
- sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *));
- if (min_sectors == 0)
- min_sectors = 1;
-
- /* min_sectors is the minimum spacing that will fit the hash
- * table in one PAGE. This may be much smaller than needed.
- * We find the smallest non-terminal set of consecutive devices
- * that is larger than min_sectors and use the size of that as
- * the actual spacing
- */
- conf->spacing = conf->array_sectors;
- for (i=0; i < cnt-1 ; i++) {
- sector_t tmp = 0;
- int j;
- for (j = i; j < cnt - 1 && tmp < min_sectors; j++)
- tmp += conf->disks[j].num_sectors;
- if (tmp >= min_sectors && tmp < conf->spacing)
- conf->spacing = tmp;
- }
-
- /* spacing may be too large for sector_div to work with,
- * so we might need to pre-shift
- */
- conf->sector_shift = 0;
- if (sizeof(sector_t) > sizeof(u32)) {
- sector_t space = conf->spacing;
- while (space > (sector_t)(~(u32)0)) {
- space >>= 1;
- conf->sector_shift++;
- }
- }
/*
- * This code was restructured to work around a gcc-2.95.3 internal
- * compiler error. Alter it with care.
+ * Here we calculate the device offsets.
*/
- {
- sector_t sz;
- unsigned round;
- unsigned long base;
-
- sz = conf->array_sectors >> conf->sector_shift;
- sz += 1; /* force round-up */
- base = conf->spacing >> conf->sector_shift;
- round = sector_div(sz, base);
- nb_zone = sz + (round ? 1 : 0);
- }
- BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *));
-
- conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
- GFP_KERNEL);
- if (!conf->hash_table)
- goto out;
+ conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
- /*
- * Here we generate the linear hash table
- * First calculate the device offsets.
- */
- conf->disks[0].start_sector = 0;
for (i = 1; i < raid_disks; i++)
- conf->disks[i].start_sector =
- conf->disks[i-1].start_sector +
- conf->disks[i-1].num_sectors;
-
- table = conf->hash_table;
- i = 0;
- for (curr_sector = 0;
- curr_sector < conf->array_sectors;
- curr_sector += conf->spacing) {
-
- while (i < raid_disks-1 &&
- curr_sector >= conf->disks[i+1].start_sector)
- i++;
-
- *table ++ = conf->disks + i;
- }
-
- if (conf->sector_shift) {
- conf->spacing >>= conf->sector_shift;
- /* round spacing up so that when we divide by it,
- * we err on the side of "too-low", which is safest.
- */
- conf->spacing++;
- }
-
- BUG_ON(table - conf->hash_table > nb_zone);
+ conf->disks[i].end_sector =
+ conf->disks[i-1].end_sector +
+ conf->disks[i].rdev->sectors;
return conf;
@@ -257,6 +206,8 @@ static int linear_run (mddev_t *mddev)
{
linear_conf_t *conf;
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);
@@ -272,6 +223,12 @@ static int linear_run (mddev_t *mddev)
return 0;
}
+static void free_conf(struct rcu_head *head)
+{
+ linear_conf_t *conf = container_of(head, linear_conf_t, rcu);
+ kfree(conf);
+}
+
static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
{
/* Adding a drive to a linear array allows the array to grow.
@@ -282,7 +239,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
* The current one is never freed until the array is stopped.
* This avoids races.
*/
- linear_conf_t *newconf;
+ linear_conf_t *newconf, *oldconf;
if (rdev->saved_raid_disk != mddev->raid_disks)
return -EINVAL;
@@ -294,25 +251,29 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
if (!newconf)
return -ENOMEM;
- newconf->prev = mddev_to_conf(mddev);
- mddev->private = newconf;
+ oldconf = rcu_dereference(mddev->private);
mddev->raid_disks++;
+ rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
+ call_rcu(&oldconf->rcu, free_conf);
return 0;
}
static int linear_stop (mddev_t *mddev)
{
- linear_conf_t *conf = mddev_to_conf(mddev);
-
+ linear_conf_t *conf = mddev->private;
+
+ /*
+ * We do not require rcu protection here since
+ * we hold reconfig_mutex for both linear_add and
+ * linear_stop, so they cannot race.
+ * We should make sure any old 'conf's are properly
+ * freed though.
+ */
+ rcu_barrier();
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
- do {
- linear_conf_t *t = conf->prev;
- kfree(conf->hash_table);
- kfree(conf);
- conf = t;
- } while (conf);
+ kfree(conf);
return 0;
}
@@ -322,6 +283,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
const int rw = bio_data_dir(bio);
mddev_t *mddev = q->queuedata;
dev_info_t *tmp_dev;
+ sector_t start_sector;
int cpu;
if (unlikely(bio_barrier(bio))) {
@@ -335,33 +297,36 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
bio_sectors(bio));
part_stat_unlock();
+ rcu_read_lock();
tmp_dev = which_dev(mddev, bio->bi_sector);
-
- if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors +
- tmp_dev->start_sector)
- || (bio->bi_sector <
- tmp_dev->start_sector))) {
+ start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
+
+
+ if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
+ || (bio->bi_sector < start_sector))) {
char b[BDEVNAME_SIZE];
printk("linear_make_request: Sector %llu out of bounds on "
"dev %s: %llu sectors, offset %llu\n",
(unsigned long long)bio->bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
- (unsigned long long)tmp_dev->num_sectors,
- (unsigned long long)tmp_dev->start_sector);
+ (unsigned long long)tmp_dev->rdev->sectors,
+ (unsigned long long)start_sector);
+ rcu_read_unlock();
bio_io_error(bio);
return 0;
}
if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
- tmp_dev->start_sector + tmp_dev->num_sectors)) {
+ tmp_dev->end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
struct bio_pair *bp;
+ sector_t end_sector = tmp_dev->end_sector;
+
+ rcu_read_unlock();
- bp = bio_split(bio,
- tmp_dev->start_sector + tmp_dev->num_sectors
- - bio->bi_sector);
+ bp = bio_split(bio, end_sector - bio->bi_sector);
if (linear_make_request(q, &bp->bio1))
generic_make_request(&bp->bio1);
@@ -372,8 +337,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
}
bio->bi_bdev = tmp_dev->rdev->bdev;
- bio->bi_sector = bio->bi_sector - tmp_dev->start_sector
+ bio->bi_sector = bio->bi_sector - start_sector
+ tmp_dev->rdev->data_offset;
+ rcu_read_unlock();
return 1;
}
@@ -381,7 +347,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
static void linear_status (struct seq_file *seq, mddev_t *mddev)
{
- seq_printf(seq, " %dk rounding", mddev->chunk_size/1024);
+ seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
diff --git a/drivers/md/linear.h b/drivers/md/linear.h
index bf8179587f9..0ce29b61605 100644
--- a/drivers/md/linear.h
+++ b/drivers/md/linear.h
@@ -3,27 +3,19 @@
struct dev_info {
mdk_rdev_t *rdev;
- sector_t num_sectors;
- sector_t start_sector;
+ sector_t end_sector;
};
typedef struct dev_info dev_info_t;
struct linear_private_data
{
- struct linear_private_data *prev; /* earlier version */
- dev_info_t **hash_table;
- sector_t spacing;
sector_t array_sectors;
- int sector_shift; /* shift before dividing
- * by spacing
- */
dev_info_t disks[0];
+ struct rcu_head rcu;
};
typedef struct linear_private_data linear_conf_t;
-#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private)
-
#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 20f6ac33834..09be637d52c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,15 +440,6 @@ static inline sector_t calc_dev_sboffset(struct block_device *bdev)
return MD_NEW_SIZE_SECTORS(num_sectors);
}
-static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size)
-{
- sector_t num_sectors = rdev->sb_start;
-
- if (chunk_size)
- num_sectors &= ~((sector_t)chunk_size/512 - 1);
- return num_sectors;
-}
-
static int alloc_disk_sb(mdk_rdev_t * rdev)
{
if (rdev->sb_page)
@@ -745,6 +736,24 @@ struct super_type {
};
/*
+ * Check that the given mddev has no bitmap.
+ *
+ * This function is called from the run method of all personalities that do not
+ * support bitmaps. It prints an error message and returns non-zero if mddev
+ * has a bitmap. Otherwise, it returns 0.
+ *
+ */
+int md_check_no_bitmap(mddev_t *mddev)
+{
+ if (!mddev->bitmap_file && !mddev->bitmap_offset)
+ return 0;
+ printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
+ mdname(mddev), mddev->pers->name);
+ return 1;
+}
+EXPORT_SYMBOL(md_check_no_bitmap);
+
+/*
* load_super for 0.90.0
*/
static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
@@ -797,17 +806,6 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
rdev->data_offset = 0;
rdev->sb_size = MD_SB_BYTES;
- if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
- if (sb->level != 1 && sb->level != 4
- && sb->level != 5 && sb->level != 6
- && sb->level != 10) {
- /* FIXME use a better test */
- printk(KERN_WARNING
- "md: bitmaps not supported for this level.\n");
- goto abort;
- }
- }
-
if (sb->level == LEVEL_MULTIPATH)
rdev->desc_nr = -1;
else
@@ -836,7 +834,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
else
ret = 0;
}
- rdev->sectors = calc_num_sectors(rdev, sb->chunk_size);
+ rdev->sectors = rdev->sb_start;
if (rdev->sectors < sb->size * 2 && sb->level > 1)
/* "this cannot possibly happen" ... */
@@ -866,7 +864,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->minor_version = sb->minor_version;
mddev->patch_version = sb->patch_version;
mddev->external = 0;
- mddev->chunk_size = sb->chunk_size;
+ mddev->chunk_sectors = sb->chunk_size >> 9;
mddev->ctime = sb->ctime;
mddev->utime = sb->utime;
mddev->level = sb->level;
@@ -883,13 +881,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->delta_disks = sb->delta_disks;
mddev->new_level = sb->new_level;
mddev->new_layout = sb->new_layout;
- mddev->new_chunk = sb->new_chunk;
+ mddev->new_chunk_sectors = sb->new_chunk >> 9;
} else {
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
mddev->new_level = mddev->level;
mddev->new_layout = mddev->layout;
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
}
if (sb->state & (1<<MD_SB_CLEAN))
@@ -1004,7 +1002,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->new_level = mddev->new_level;
sb->delta_disks = mddev->delta_disks;
sb->new_layout = mddev->new_layout;
- sb->new_chunk = mddev->new_chunk;
+ sb->new_chunk = mddev->new_chunk_sectors << 9;
}
mddev->minor_version = sb->minor_version;
if (mddev->in_sync)
@@ -1018,7 +1016,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->recovery_cp = 0;
sb->layout = mddev->layout;
- sb->chunk_size = mddev->chunk_size;
+ sb->chunk_size = mddev->chunk_sectors << 9;
if (mddev->bitmap && mddev->bitmap_file == NULL)
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
@@ -1185,17 +1183,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
bdevname(rdev->bdev,b));
return -EINVAL;
}
- if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
- if (sb->level != cpu_to_le32(1) &&
- sb->level != cpu_to_le32(4) &&
- sb->level != cpu_to_le32(5) &&
- sb->level != cpu_to_le32(6) &&
- sb->level != cpu_to_le32(10)) {
- printk(KERN_WARNING
- "md: bitmaps not supported for this level.\n");
- return -EINVAL;
- }
- }
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
@@ -1248,9 +1235,6 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
if (rdev->sectors < le64_to_cpu(sb->data_size))
return -EINVAL;
rdev->sectors = le64_to_cpu(sb->data_size);
- if (le32_to_cpu(sb->chunksize))
- rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1);
-
if (le64_to_cpu(sb->size) > rdev->sectors)
return -EINVAL;
return ret;
@@ -1271,7 +1255,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->major_version = 1;
mddev->patch_version = 0;
mddev->external = 0;
- mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
+ mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
mddev->level = le32_to_cpu(sb->level);
@@ -1297,13 +1281,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->delta_disks = le32_to_cpu(sb->delta_disks);
mddev->new_level = le32_to_cpu(sb->new_level);
mddev->new_layout = le32_to_cpu(sb->new_layout);
- mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
+ mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
} else {
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
mddev->new_level = mddev->level;
mddev->new_layout = mddev->layout;
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
}
} else if (mddev->pers == NULL) {
@@ -1375,7 +1359,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->raid_disks = cpu_to_le32(mddev->raid_disks);
sb->size = cpu_to_le64(mddev->dev_sectors);
- sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9);
+ sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
@@ -1402,7 +1386,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->new_layout = cpu_to_le32(mddev->new_layout);
sb->delta_disks = cpu_to_le32(mddev->delta_disks);
sb->new_level = cpu_to_le32(mddev->new_level);
- sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
+ sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
}
max_dev = 0;
@@ -1897,6 +1881,7 @@ static void md_update_sb(mddev_t * mddev, int force_change)
int sync_req;
int nospares = 0;
+ mddev->utime = get_seconds();
if (mddev->external)
return;
repeat:
@@ -1926,7 +1911,6 @@ repeat:
nospares = 0;
sync_req = mddev->in_sync;
- mddev->utime = get_seconds();
/* If this is just a dirty<->clean transition, and the array is clean
* and 'events' is odd, we can roll back to the previous clean state */
@@ -2597,15 +2581,6 @@ static void analyze_sbs(mddev_t * mddev)
clear_bit(In_sync, &rdev->flags);
}
}
-
-
-
- if (mddev->recovery_cp != MaxSector &&
- mddev->level >= 1)
- printk(KERN_ERR "md: %s: raid array is not clean"
- " -- starting background reconstruction\n",
- mdname(mddev));
-
}
static void md_safemode_timeout(unsigned long data);
@@ -2746,7 +2721,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
if (IS_ERR(priv)) {
mddev->new_level = mddev->level;
mddev->new_layout = mddev->layout;
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
mddev->raid_disks -= mddev->delta_disks;
mddev->delta_disks = 0;
module_put(pers->owner);
@@ -2764,7 +2739,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
- mddev->chunk_size = mddev->new_chunk;
+ mddev->chunk_sectors = mddev->new_chunk_sectors;
mddev->delta_disks = 0;
pers->run(mddev);
mddev_resume(mddev);
@@ -2800,11 +2775,14 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
if (mddev->pers) {
int err;
- if (mddev->pers->reconfig == NULL)
+ if (mddev->pers->check_reshape == NULL)
return -EBUSY;
- err = mddev->pers->reconfig(mddev, n, -1);
- if (err)
+ mddev->new_layout = n;
+ err = mddev->pers->check_reshape(mddev);
+ if (err) {
+ mddev->new_layout = mddev->layout;
return err;
+ }
} else {
mddev->new_layout = n;
if (mddev->reshape_position == MaxSector)
@@ -2857,10 +2835,11 @@ static ssize_t
chunk_size_show(mddev_t *mddev, char *page)
{
if (mddev->reshape_position != MaxSector &&
- mddev->chunk_size != mddev->new_chunk)
- return sprintf(page, "%d (%d)\n", mddev->new_chunk,
- mddev->chunk_size);
- return sprintf(page, "%d\n", mddev->chunk_size);
+ mddev->chunk_sectors != mddev->new_chunk_sectors)
+ return sprintf(page, "%d (%d)\n",
+ mddev->new_chunk_sectors << 9,
+ mddev->chunk_sectors << 9);
+ return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
}
static ssize_t
@@ -2874,15 +2853,18 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
if (mddev->pers) {
int err;
- if (mddev->pers->reconfig == NULL)
+ if (mddev->pers->check_reshape == NULL)
return -EBUSY;
- err = mddev->pers->reconfig(mddev, -1, n);
- if (err)
+ mddev->new_chunk_sectors = n >> 9;
+ err = mddev->pers->check_reshape(mddev);
+ if (err) {
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
return err;
+ }
} else {
- mddev->new_chunk = n;
+ mddev->new_chunk_sectors = n >> 9;
if (mddev->reshape_position == MaxSector)
- mddev->chunk_size = n;
+ mddev->chunk_sectors = n >> 9;
}
return len;
}
@@ -3527,8 +3509,9 @@ min_sync_store(mddev_t *mddev, const char *buf, size_t len)
return -EBUSY;
/* Must be a multiple of chunk_size */
- if (mddev->chunk_size) {
- if (min & (sector_t)((mddev->chunk_size>>9)-1))
+ if (mddev->chunk_sectors) {
+ sector_t temp = min;
+ if (sector_div(temp, mddev->chunk_sectors))
return -EINVAL;
}
mddev->resync_min = min;
@@ -3564,8 +3547,9 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
return -EBUSY;
/* Must be a multiple of chunk_size */
- if (mddev->chunk_size) {
- if (max & (sector_t)((mddev->chunk_size>>9)-1))
+ if (mddev->chunk_sectors) {
+ sector_t temp = max;
+ if (sector_div(temp, mddev->chunk_sectors))
return -EINVAL;
}
mddev->resync_max = max;
@@ -3656,7 +3640,7 @@ reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
mddev->delta_disks = 0;
mddev->new_level = mddev->level;
mddev->new_layout = mddev->layout;
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
return len;
}
@@ -3976,11 +3960,9 @@ static int start_dirty_degraded;
static int do_md_run(mddev_t * mddev)
{
int err;
- int chunk_size;
mdk_rdev_t *rdev;
struct gendisk *disk;
struct mdk_personality *pers;
- char b[BDEVNAME_SIZE];
if (list_empty(&mddev->disks))
/* cannot run an array with no devices.. */
@@ -3998,38 +3980,6 @@ static int do_md_run(mddev_t * mddev)
analyze_sbs(mddev);
}
- chunk_size = mddev->chunk_size;
-
- if (chunk_size) {
- if (chunk_size > MAX_CHUNK_SIZE) {
- printk(KERN_ERR "too big chunk_size: %d > %d\n",
- chunk_size, MAX_CHUNK_SIZE);
- return -EINVAL;
- }
- /*
- * chunk-size has to be a power of 2
- */
- if ( (1 << ffz(~chunk_size)) != chunk_size) {
- printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
- return -EINVAL;
- }
-
- /* devices must have minimum size of one chunk */
- list_for_each_entry(rdev, &mddev->disks, same_set) {
- if (test_bit(Faulty, &rdev->flags))
- continue;
- if (rdev->sectors < chunk_size / 512) {
- printk(KERN_WARNING
- "md: Dev %s smaller than chunk_size:"
- " %llu < %d\n",
- bdevname(rdev->bdev,b),
- (unsigned long long)rdev->sectors,
- chunk_size / 512);
- return -EINVAL;
- }
- }
- }
-
if (mddev->level != LEVEL_NONE)
request_module("md-level-%d", mddev->level);
else if (mddev->clevel[0])
@@ -4405,7 +4355,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->flags = 0;
mddev->ro = 0;
mddev->metadata_type[0] = 0;
- mddev->chunk_size = 0;
+ mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0;
mddev->layout = 0;
mddev->max_disks = 0;
@@ -4413,7 +4363,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->delta_disks = 0;
mddev->new_level = LEVEL_NONE;
mddev->new_layout = 0;
- mddev->new_chunk = 0;
+ mddev->new_chunk_sectors = 0;
mddev->curr_resync = 0;
mddev->resync_mismatches = 0;
mddev->suspend_lo = mddev->suspend_hi = 0;
@@ -4618,7 +4568,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
info.spare_disks = spare;
info.layout = mddev->layout;
- info.chunk_size = mddev->chunk_size;
+ info.chunk_size = mddev->chunk_sectors << 9;
if (copy_to_user(arg, &info, sizeof(info)))
return -EFAULT;
@@ -4843,7 +4793,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
} else
rdev->sb_start = calc_dev_sboffset(rdev->bdev);
- rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
+ rdev->sectors = rdev->sb_start;
err = bind_rdev_to_array(rdev, mddev);
if (err) {
@@ -4913,7 +4863,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
else
rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
- rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
+ rdev->sectors = rdev->sb_start;
if (test_bit(Faulty, &rdev->flags)) {
printk(KERN_WARNING
@@ -5062,7 +5012,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
mddev->external = 0;
mddev->layout = info->layout;
- mddev->chunk_size = info->chunk_size;
+ mddev->chunk_sectors = info->chunk_size >> 9;
mddev->max_disks = MD_SB_DISKS;
@@ -5081,7 +5031,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
get_random_bytes(mddev->uuid, 16);
mddev->new_level = mddev->level;
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
mddev->new_layout = mddev->layout;
mddev->delta_disks = 0;
@@ -5191,7 +5141,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
mddev->level != info->level ||
/* mddev->layout != info->layout || */
!mddev->persistent != info->not_persistent||
- mddev->chunk_size != info->chunk_size ||
+ mddev->chunk_sectors != info->chunk_size >> 9 ||
/* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
((state^info->state) & 0xfffffe00)
)
@@ -5215,10 +5165,15 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
* we don't need to do anything at the md level, the
* personality will take care of it all.
*/
- if (mddev->pers->reconfig == NULL)
+ if (mddev->pers->check_reshape == NULL)
return -EINVAL;
- else
- return mddev->pers->reconfig(mddev, info->layout, -1);
+ else {
+ mddev->new_layout = info->layout;
+ rv = mddev->pers->check_reshape(mddev);
+ if (rv)
+ mddev->new_layout = mddev->layout;
+ return rv;
+ }
}
if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
rv = update_size(mddev, (sector_t)info->size * 2);
@@ -6717,7 +6672,8 @@ void md_check_recovery(mddev_t *mddev)
*/
if (mddev->reshape_position != MaxSector) {
- if (mddev->pers->check_reshape(mddev) != 0)
+ if (mddev->pers->check_reshape == NULL ||
+ mddev->pers->check_reshape(mddev) != 0)
/* Cannot proceed */
goto unlock;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 8227ab909d4..9430a110db9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -30,13 +30,6 @@ typedef struct mddev_s mddev_t;
typedef struct mdk_rdev_s mdk_rdev_t;
/*
- * options passed in raidrun:
- */
-
-/* Currently this must fit in an 'int' */
-#define MAX_CHUNK_SIZE (1<<30)
-
-/*
* MD's 'extended' device
*/
struct mdk_rdev_s
@@ -145,7 +138,7 @@ struct mddev_s
int external; /* metadata is
* managed externally */
char metadata_type[17]; /* externally set*/
- int chunk_size;
+ int chunk_sectors;
time_t ctime, utime;
int level, layout;
char clevel[16];
@@ -166,7 +159,8 @@ struct mddev_s
* If reshape_position is MaxSector, then no reshape is happening (yet).
*/
sector_t reshape_position;
- int delta_disks, new_level, new_layout, new_chunk;
+ int delta_disks, new_level, new_layout;
+ int new_chunk_sectors;
struct mdk_thread_s *thread; /* management thread */
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
@@ -325,7 +319,6 @@ struct mdk_personality
int (*check_reshape) (mddev_t *mddev);
int (*start_reshape) (mddev_t *mddev);
void (*finish_reshape) (mddev_t *mddev);
- int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
/* quiesce moves between quiescence states
* 0 - fully active
* 1 - no new requests allowed
@@ -437,5 +430,6 @@ extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
+extern int md_check_no_bitmap(mddev_t *mddev);
#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 4ee31aa13c4..cbe368fa659 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -58,7 +58,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
{
unsigned long flags;
mddev_t *mddev = mp_bh->mddev;
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
spin_lock_irqsave(&conf->device_lock, flags);
list_add(&mp_bh->retry_list, &conf->retry_list);
@@ -75,7 +75,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
{
struct bio *bio = mp_bh->master_bio;
- multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
+ multipath_conf_t *conf = mp_bh->mddev->private;
bio_endio(bio, err);
mempool_free(mp_bh, conf->pool);
@@ -85,7 +85,7 @@ static void multipath_end_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
- multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
+ multipath_conf_t *conf = mp_bh->mddev->private;
mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev;
if (uptodate)
@@ -107,7 +107,7 @@ static void multipath_end_request(struct bio *bio, int error)
static void unplug_slaves(mddev_t *mddev)
{
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
int i;
rcu_read_lock();
@@ -138,7 +138,7 @@ static void multipath_unplug(struct request_queue *q)
static int multipath_make_request (struct request_queue *q, struct bio * bio)
{
mddev_t *mddev = q->queuedata;
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
struct multipath_bh * mp_bh;
struct multipath_info *multipath;
const int rw = bio_data_dir(bio);
@@ -180,7 +180,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
static void multipath_status (struct seq_file *seq, mddev_t *mddev)
{
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
int i;
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
@@ -195,7 +195,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
static int multipath_congested(void *data, int bits)
{
mddev_t *mddev = data;
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
int i, ret = 0;
rcu_read_lock();
@@ -220,7 +220,7 @@ static int multipath_congested(void *data, int bits)
*/
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
if (conf->working_disks <= 1) {
/*
@@ -367,7 +367,7 @@ static void multipathd (mddev_t *mddev)
struct multipath_bh *mp_bh;
struct bio *bio;
unsigned long flags;
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
md_check_recovery(mddev);
@@ -421,6 +421,9 @@ static int multipath_run (mddev_t *mddev)
struct multipath_info *disk;
mdk_rdev_t *rdev;
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
+
if (mddev->level != LEVEL_MULTIPATH) {
printk("multipath: %s: raid level not set to multipath IO (%d)\n",
mdname(mddev), mddev->level);
@@ -531,7 +534,7 @@ out:
static int multipath_stop (mddev_t *mddev)
{
- multipath_conf_t *conf = mddev_to_conf(mddev);
+ multipath_conf_t *conf = mddev->private;
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h
index 6fa70b400cd..d1c2a8d7839 100644
--- a/drivers/md/multipath.h
+++ b/drivers/md/multipath.h
@@ -19,12 +19,6 @@ struct multipath_private_data {
typedef struct multipath_private_data multipath_conf_t;
/*
- * this is the only point in the RAID code where we violate
- * C type safety. mddev->private is an 'opaque' pointer.
- */
-#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private)
-
-/*
* this is our 'private' 'collective' MULTIPATH buffer head.
* it contains information about what kind of IO operations were started
* for this MULTIPATH operation, and about their status:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 925507e7d67..ab4a489d869 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -26,8 +26,8 @@
static void raid0_unplug(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
- raid0_conf_t *conf = mddev_to_conf(mddev);
- mdk_rdev_t **devlist = conf->strip_zone[0].dev;
+ raid0_conf_t *conf = mddev->private;
+ mdk_rdev_t **devlist = conf->devlist;
int i;
for (i=0; i<mddev->raid_disks; i++) {
@@ -40,8 +40,8 @@ static void raid0_unplug(struct request_queue *q)
static int raid0_congested(void *data, int bits)
{
mddev_t *mddev = data;
- raid0_conf_t *conf = mddev_to_conf(mddev);
- mdk_rdev_t **devlist = conf->strip_zone[0].dev;
+ raid0_conf_t *conf = mddev->private;
+ mdk_rdev_t **devlist = conf->devlist;
int i, ret = 0;
for (i = 0; i < mddev->raid_disks && !ret ; i++) {
@@ -52,27 +52,60 @@ static int raid0_congested(void *data, int bits)
return ret;
}
+/*
+ * inform the user of the raid configuration
+*/
+static void dump_zones(mddev_t *mddev)
+{
+ int j, k, h;
+ sector_t zone_size = 0;
+ sector_t zone_start = 0;
+ char b[BDEVNAME_SIZE];
+ raid0_conf_t *conf = mddev->private;
+ printk(KERN_INFO "******* %s configuration *********\n",
+ mdname(mddev));
+ h = 0;
+ for (j = 0; j < conf->nr_strip_zones; j++) {
+ printk(KERN_INFO "zone%d=[", j);
+ for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
+ printk("%s/",
+ bdevname(conf->devlist[j*mddev->raid_disks
+ + k]->bdev, b));
+ printk("]\n");
+
+ zone_size = conf->strip_zone[j].zone_end - zone_start;
+ printk(KERN_INFO " zone offset=%llukb "
+ "device offset=%llukb size=%llukb\n",
+ (unsigned long long)zone_start>>1,
+ (unsigned long long)conf->strip_zone[j].dev_start>>1,
+ (unsigned long long)zone_size>>1);
+ zone_start = conf->strip_zone[j].zone_end;
+ }
+ printk(KERN_INFO "**********************************\n\n");
+}
-static int create_strip_zones (mddev_t *mddev)
+static int create_strip_zones(mddev_t *mddev)
{
- int i, c, j;
- sector_t current_start, curr_zone_start;
- sector_t min_spacing;
- raid0_conf_t *conf = mddev_to_conf(mddev);
- mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
+ int i, c, j, err;
+ sector_t curr_zone_end, sectors;
+ mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
struct strip_zone *zone;
int cnt;
char b[BDEVNAME_SIZE];
-
- /*
- * The number of 'same size groups'
- */
- conf->nr_strip_zones = 0;
-
+ raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
+
+ if (!conf)
+ return -ENOMEM;
list_for_each_entry(rdev1, &mddev->disks, same_set) {
printk(KERN_INFO "raid0: looking at %s\n",
bdevname(rdev1->bdev,b));
c = 0;
+
+ /* round size to chunk_size */
+ sectors = rdev1->sectors;
+ sector_div(sectors, mddev->chunk_sectors);
+ rdev1->sectors = sectors * mddev->chunk_sectors;
+
list_for_each_entry(rdev2, &mddev->disks, same_set) {
printk(KERN_INFO "raid0: comparing %s(%llu)",
bdevname(rdev1->bdev,b),
@@ -103,16 +136,16 @@ static int create_strip_zones (mddev_t *mddev)
}
}
printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
-
+ err = -ENOMEM;
conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
conf->nr_strip_zones, GFP_KERNEL);
if (!conf->strip_zone)
- return 1;
+ goto abort;
conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
conf->nr_strip_zones*mddev->raid_disks,
GFP_KERNEL);
if (!conf->devlist)
- return 1;
+ goto abort;
/* The first zone must contain all devices, so here we check that
* there is a proper alignment of slots to devices and find them all
@@ -120,7 +153,8 @@ static int create_strip_zones (mddev_t *mddev)
zone = &conf->strip_zone[0];
cnt = 0;
smallest = NULL;
- zone->dev = conf->devlist;
+ dev = conf->devlist;
+ err = -EINVAL;
list_for_each_entry(rdev1, &mddev->disks, same_set) {
int j = rdev1->raid_disk;
@@ -129,12 +163,12 @@ static int create_strip_zones (mddev_t *mddev)
"aborting!\n", j);
goto abort;
}
- if (zone->dev[j]) {
+ if (dev[j]) {
printk(KERN_ERR "raid0: multiple devices for %d - "
"aborting!\n", j);
goto abort;
}
- zone->dev[j] = rdev1;
+ dev[j] = rdev1;
blk_queue_stack_limits(mddev->queue,
rdev1->bdev->bd_disk->queue);
@@ -157,34 +191,32 @@ static int create_strip_zones (mddev_t *mddev)
goto abort;
}
zone->nb_dev = cnt;
- zone->sectors = smallest->sectors * cnt;
- zone->zone_start = 0;
+ zone->zone_end = smallest->sectors * cnt;
- current_start = smallest->sectors;
- curr_zone_start = zone->sectors;
+ curr_zone_end = zone->zone_end;
/* now do the other zones */
for (i = 1; i < conf->nr_strip_zones; i++)
{
zone = conf->strip_zone + i;
- zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks;
+ dev = conf->devlist + i * mddev->raid_disks;
printk(KERN_INFO "raid0: zone %d\n", i);
- zone->dev_start = current_start;
+ zone->dev_start = smallest->sectors;
smallest = NULL;
c = 0;
for (j=0; j<cnt; j++) {
char b[BDEVNAME_SIZE];
- rdev = conf->strip_zone[0].dev[j];
+ rdev = conf->devlist[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
- if (rdev->sectors <= current_start) {
+ if (rdev->sectors <= zone->dev_start) {
printk(KERN_INFO " nope.\n");
continue;
}
printk(KERN_INFO " contained as device %d\n", c);
- zone->dev[c] = rdev;
+ dev[c] = rdev;
c++;
if (!smallest || rdev->sectors < smallest->sectors) {
smallest = rdev;
@@ -194,47 +226,39 @@ static int create_strip_zones (mddev_t *mddev)
}
zone->nb_dev = c;
- zone->sectors = (smallest->sectors - current_start) * c;
+ sectors = (smallest->sectors - zone->dev_start) * c;
printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
- zone->nb_dev, (unsigned long long)zone->sectors);
+ zone->nb_dev, (unsigned long long)sectors);
- zone->zone_start = curr_zone_start;
- curr_zone_start += zone->sectors;
+ curr_zone_end += sectors;
+ zone->zone_end = curr_zone_end;
- current_start = smallest->sectors;
printk(KERN_INFO "raid0: current zone start: %llu\n",
- (unsigned long long)current_start);
- }
-
- /* Now find appropriate hash spacing.
- * We want a number which causes most hash entries to cover
- * at most two strips, but the hash table must be at most
- * 1 PAGE. We choose the smallest strip, or contiguous collection
- * of strips, that has big enough size. We never consider the last
- * strip though as it's size has no bearing on the efficacy of the hash
- * table.
- */
- conf->spacing = curr_zone_start;
- min_spacing = curr_zone_start;
- sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*));
- for (i=0; i < conf->nr_strip_zones-1; i++) {
- sector_t s = 0;
- for (j = i; j < conf->nr_strip_zones - 1 &&
- s < min_spacing; j++)
- s += conf->strip_zone[j].sectors;
- if (s >= min_spacing && s < conf->spacing)
- conf->spacing = s;
+ (unsigned long long)smallest->sectors);
}
-
mddev->queue->unplug_fn = raid0_unplug;
-
mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
+ /*
+ * now since we have the hard sector sizes, we can make sure
+ * chunk size is a multiple of that sector size
+ */
+ if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
+ printk(KERN_ERR "%s chunk_size of %d not valid\n",
+ mdname(mddev),
+ mddev->chunk_sectors << 9);
+ goto abort;
+ }
printk(KERN_INFO "raid0: done.\n");
+ mddev->private = conf;
return 0;
- abort:
- return 1;
+abort:
+ kfree(conf->strip_zone);
+ kfree(conf->devlist);
+ kfree(conf);
+ mddev->private = NULL;
+ return err;
}
/**
@@ -252,10 +276,15 @@ static int raid0_mergeable_bvec(struct request_queue *q,
mddev_t *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
- max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
+ if (is_power_of_2(chunk_sectors))
+ max = (chunk_sectors - ((sector & (chunk_sectors-1))
+ + bio_sectors)) << 9;
+ else
+ max = (chunk_sectors - (sector_div(sector, chunk_sectors)
+ + bio_sectors)) << 9;
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
if (max <= biovec->bv_len && bio_sectors == 0)
return biovec->bv_len;
@@ -277,84 +306,28 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return array_sectors;
}
-static int raid0_run (mddev_t *mddev)
+static int raid0_run(mddev_t *mddev)
{
- unsigned cur=0, i=0, nb_zone;
- s64 sectors;
- raid0_conf_t *conf;
+ int ret;
- if (mddev->chunk_size == 0) {
- printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
+ if (mddev->chunk_sectors == 0) {
+ printk(KERN_ERR "md/raid0: chunk size must be set.\n");
return -EINVAL;
}
- printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n",
- mdname(mddev),
- mddev->chunk_size >> 9,
- (mddev->chunk_size>>1)-1);
- blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
- blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1);
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
+ blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors);
mddev->queue->queue_lock = &mddev->queue->__queue_lock;
- conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
- if (!conf)
- goto out;
- mddev->private = (void *)conf;
-
- conf->strip_zone = NULL;
- conf->devlist = NULL;
- if (create_strip_zones (mddev))
- goto out_free_conf;
+ ret = create_strip_zones(mddev);
+ if (ret < 0)
+ return ret;
/* calculate array device size */
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors);
- printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
- (unsigned long long)conf->spacing);
- {
- sector_t s = raid0_size(mddev, 0, 0);
- sector_t space = conf->spacing;
- int round;
- conf->sector_shift = 0;
- if (sizeof(sector_t) > sizeof(u32)) {
- /*shift down space and s so that sector_div will work */
- while (space > (sector_t) (~(u32)0)) {
- s >>= 1;
- space >>= 1;
- s += 1; /* force round-up */
- conf->sector_shift++;
- }
- }
- round = sector_div(s, (u32)space) ? 1 : 0;
- nb_zone = s + round;
- }
- printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone);
-
- printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n",
- nb_zone*sizeof(struct strip_zone*));
- conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL);
- if (!conf->hash_table)
- goto out_free_conf;
- sectors = conf->strip_zone[cur].sectors;
-
- conf->hash_table[0] = conf->strip_zone + cur;
- for (i=1; i< nb_zone; i++) {
- while (sectors <= conf->spacing) {
- cur++;
- sectors += conf->strip_zone[cur].sectors;
- }
- sectors -= conf->spacing;
- conf->hash_table[i] = conf->strip_zone + cur;
- }
- if (conf->sector_shift) {
- conf->spacing >>= conf->sector_shift;
- /* round spacing up so when we divide by it, we
- * err on the side of too-low, which is safest
- */
- conf->spacing++;
- }
-
/* calculate the max read-ahead size.
* For read-ahead of large files to be effective, we need to
* readahead at least twice a whole stripe. i.e. number of devices
@@ -365,48 +338,107 @@ static int raid0_run (mddev_t *mddev)
* chunksize should be used in that case.
*/
{
- int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE;
+ int stripe = mddev->raid_disks *
+ (mddev->chunk_sectors << 9) / PAGE_SIZE;
if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
}
-
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
+ dump_zones(mddev);
return 0;
+}
-out_free_conf:
+static int raid0_stop(mddev_t *mddev)
+{
+ raid0_conf_t *conf = mddev->private;
+
+ blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
kfree(conf->strip_zone);
kfree(conf->devlist);
kfree(conf);
mddev->private = NULL;
-out:
- return -ENOMEM;
+ return 0;
}
-static int raid0_stop (mddev_t *mddev)
+/* Find the zone which holds a particular offset
+ * Update *sectorp to be an offset in that zone
+ */
+static struct strip_zone *find_zone(struct raid0_private_data *conf,
+ sector_t *sectorp)
{
- raid0_conf_t *conf = mddev_to_conf(mddev);
+ int i;
+ struct strip_zone *z = conf->strip_zone;
+ sector_t sector = *sectorp;
+
+ for (i = 0; i < conf->nr_strip_zones; i++)
+ if (sector < z[i].zone_end) {
+ if (i)
+ *sectorp = sector - z[i-1].zone_end;
+ return z + i;
+ }
+ BUG();
+}
- blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
- kfree(conf->hash_table);
- conf->hash_table = NULL;
- kfree(conf->strip_zone);
- conf->strip_zone = NULL;
- kfree(conf);
- mddev->private = NULL;
+/*
+ * remaps the bio to the target device. we separate two flows.
+ * power 2 flow and a general flow for the sake of perfromance
+*/
+static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
+ sector_t sector, sector_t *sector_offset)
+{
+ unsigned int sect_in_chunk;
+ sector_t chunk;
+ raid0_conf_t *conf = mddev->private;
+ unsigned int chunk_sects = mddev->chunk_sectors;
+
+ if (is_power_of_2(chunk_sects)) {
+ int chunksect_bits = ffz(~chunk_sects);
+ /* find the sector offset inside the chunk */
+ sect_in_chunk = sector & (chunk_sects - 1);
+ sector >>= chunksect_bits;
+ /* chunk in zone */
+ chunk = *sector_offset;
+ /* quotient is the chunk in real device*/
+ sector_div(chunk, zone->nb_dev << chunksect_bits);
+ } else{
+ sect_in_chunk = sector_div(sector, chunk_sects);
+ chunk = *sector_offset;
+ sector_div(chunk, chunk_sects * zone->nb_dev);
+ }
+ /*
+ * position the bio over the real device
+ * real sector = chunk in device + starting of zone
+ * + the position in the chunk
+ */
+ *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
+ return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks
+ + sector_div(sector, zone->nb_dev)];
+}
- return 0;
+/*
+ * Is io distribute over 1 or more chunks ?
+*/
+static inline int is_io_in_chunk_boundary(mddev_t *mddev,
+ unsigned int chunk_sects, struct bio *bio)
+{
+ if (likely(is_power_of_2(chunk_sects))) {
+ return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
+ + (bio->bi_size >> 9));
+ } else{
+ sector_t sector = bio->bi_sector;
+ return chunk_sects >= (sector_div(sector, chunk_sects)
+ + (bio->bi_size >> 9));
+ }
}
-static int raid0_make_request (struct request_queue *q, struct bio *bio)
+static int raid0_make_request(struct request_queue *q, struct bio *bio)
{
mddev_t *mddev = q->queuedata;
- unsigned int sect_in_chunk, chunksect_bits, chunk_sects;
- raid0_conf_t *conf = mddev_to_conf(mddev);
+ unsigned int chunk_sects;
+ sector_t sector_offset;
struct strip_zone *zone;
mdk_rdev_t *tmp_dev;
- sector_t chunk;
- sector_t sector, rsect;
const int rw = bio_data_dir(bio);
int cpu;
@@ -421,11 +453,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
bio_sectors(bio));
part_stat_unlock();
- chunk_sects = mddev->chunk_size >> 9;
- chunksect_bits = ffz(~chunk_sects);
- sector = bio->bi_sector;
-
- if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) {
+ chunk_sects = mddev->chunk_sectors;
+ if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
+ sector_t sector = bio->bi_sector;
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
if (bio->bi_vcnt != 1 ||
@@ -434,7 +464,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
*/
- bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)));
+ if (likely(is_power_of_2(chunk_sects)))
+ bp = bio_split(bio, chunk_sects - (sector &
+ (chunk_sects-1)));
+ else
+ bp = bio_split(bio, chunk_sects -
+ sector_div(sector, chunk_sects));
if (raid0_make_request(q, &bp->bio1))
generic_make_request(&bp->bio1);
if (raid0_make_request(q, &bp->bio2))
@@ -443,34 +478,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
bio_pair_release(bp);
return 0;
}
-
-
- {
- sector_t x = sector >> conf->sector_shift;
- sector_div(x, (u32)conf->spacing);
- zone = conf->hash_table[x];
- }
- while (sector >= zone->zone_start + zone->sectors)
- zone++;
-
- sect_in_chunk = bio->bi_sector & (chunk_sects - 1);
-
-
- {
- sector_t x = (sector - zone->zone_start) >> chunksect_bits;
-
- sector_div(x, zone->nb_dev);
- chunk = x;
-
- x = sector >> chunksect_bits;
- tmp_dev = zone->dev[sector_div(x, zone->nb_dev)];
- }
- rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk;
-
+ sector_offset = bio->bi_sector;
+ zone = find_zone(mddev->private, &sector_offset);
+ tmp_dev = map_sector(mddev, zone, bio->bi_sector,
+ &sector_offset);
bio->bi_bdev = tmp_dev->bdev;
- bio->bi_sector = rsect + tmp_dev->data_offset;
-
+ bio->bi_sector = sector_offset + zone->dev_start +
+ tmp_dev->data_offset;
/*
* Let the main block layer submit the IO and resolve recursion:
*/
@@ -485,31 +500,35 @@ bad_map:
return 0;
}
-static void raid0_status (struct seq_file *seq, mddev_t *mddev)
+static void raid0_status(struct seq_file *seq, mddev_t *mddev)
{
#undef MD_DEBUG
#ifdef MD_DEBUG
int j, k, h;
char b[BDEVNAME_SIZE];
- raid0_conf_t *conf = mddev_to_conf(mddev);
+ raid0_conf_t *conf = mddev->private;
+ sector_t zone_size;
+ sector_t zone_start = 0;
h = 0;
+
for (j = 0; j < conf->nr_strip_zones; j++) {
seq_printf(seq, " z%d", j);
- if (conf->hash_table[h] == conf->strip_zone+j)
- seq_printf(seq, "(h%d)", h++);
seq_printf(seq, "=[");
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
seq_printf(seq, "%s/", bdevname(
- conf->strip_zone[j].dev[k]->bdev,b));
-
- seq_printf(seq, "] zs=%d ds=%d s=%d\n",
- conf->strip_zone[j].zone_start,
- conf->strip_zone[j].dev_start,
- conf->strip_zone[j].sectors);
+ conf->devlist[j*mddev->raid_disks + k]
+ ->bdev, b));
+
+ zone_size = conf->strip_zone[j].zone_end - zone_start;
+ seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
+ (unsigned long long)zone_start>>1,
+ (unsigned long long)conf->strip_zone[j].dev_start>>1,
+ (unsigned long long)zone_size>>1);
+ zone_start = conf->strip_zone[j].zone_end;
}
#endif
- seq_printf(seq, " %dk chunks", mddev->chunk_size/1024);
+ seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
return;
}
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
index 824b12eb1d4..91f8e876ee6 100644
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -3,26 +3,18 @@
struct strip_zone
{
- sector_t zone_start; /* Zone offset in md_dev (in sectors) */
+ sector_t zone_end; /* Start of the next zone (in sectors) */
sector_t dev_start; /* Zone offset in real dev (in sectors) */
- sector_t sectors; /* Zone size in sectors */
int nb_dev; /* # of devices attached to the zone */
- mdk_rdev_t **dev; /* Devices attached to the zone */
};
struct raid0_private_data
{
- struct strip_zone **hash_table; /* Table of indexes into strip_zone */
struct strip_zone *strip_zone;
mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
int nr_strip_zones;
-
- sector_t spacing;
- int sector_shift; /* shift this before divide by spacing */
};
typedef struct raid0_private_data raid0_conf_t;
-#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private)
-
#endif
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e23758b4a34..89939a7aef5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -182,7 +182,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
static void free_r1bio(r1bio_t *r1_bio)
{
- conf_t *conf = mddev_to_conf(r1_bio->mddev);
+ conf_t *conf = r1_bio->mddev->private;
/*
* Wake up any possible resync thread that waits for the device
@@ -196,7 +196,7 @@ static void free_r1bio(r1bio_t *r1_bio)
static void put_buf(r1bio_t *r1_bio)
{
- conf_t *conf = mddev_to_conf(r1_bio->mddev);
+ conf_t *conf = r1_bio->mddev->private;
int i;
for (i=0; i<conf->raid_disks; i++) {
@@ -214,7 +214,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
{
unsigned long flags;
mddev_t *mddev = r1_bio->mddev;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
spin_lock_irqsave(&conf->device_lock, flags);
list_add(&r1_bio->retry_list, &conf->retry_list);
@@ -253,7 +253,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio)
*/
static inline void update_head_pos(int disk, r1bio_t *r1_bio)
{
- conf_t *conf = mddev_to_conf(r1_bio->mddev);
+ conf_t *conf = r1_bio->mddev->private;
conf->mirrors[disk].head_position =
r1_bio->sector + (r1_bio->sectors);
@@ -264,7 +264,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int mirror;
- conf_t *conf = mddev_to_conf(r1_bio->mddev);
+ conf_t *conf = r1_bio->mddev->private;
mirror = r1_bio->read_disk;
/*
@@ -309,7 +309,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
- conf_t *conf = mddev_to_conf(r1_bio->mddev);
+ conf_t *conf = r1_bio->mddev->private;
struct bio *to_put = NULL;
@@ -541,7 +541,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
static void unplug_slaves(mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
rcu_read_lock();
@@ -573,7 +573,7 @@ static void raid1_unplug(struct request_queue *q)
static int raid1_congested(void *data, int bits)
{
mddev_t *mddev = data;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i, ret = 0;
rcu_read_lock();
@@ -772,7 +772,7 @@ do_sync_io:
static int make_request(struct request_queue *q, struct bio * bio)
{
mddev_t *mddev = q->queuedata;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
mirror_info_t *mirror;
r1bio_t *r1_bio;
struct bio *read_bio;
@@ -991,7 +991,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
static void status(struct seq_file *seq, mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
@@ -1010,7 +1010,7 @@ static void status(struct seq_file *seq, mddev_t *mddev)
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{
char b[BDEVNAME_SIZE];
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
/*
* If it is not operational, then we have already marked it as dead
@@ -1214,7 +1214,7 @@ static void end_sync_write(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
mddev_t *mddev = r1_bio->mddev;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
int mirror=0;
@@ -1248,7 +1248,7 @@ static void end_sync_write(struct bio *bio, int error)
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
int disks = conf->raid_disks;
struct bio *bio, *wbio;
@@ -1562,7 +1562,7 @@ static void raid1d(mddev_t *mddev)
r1bio_t *r1_bio;
struct bio *bio;
unsigned long flags;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
int unplug=0;
mdk_rdev_t *rdev;
@@ -1585,7 +1585,7 @@ static void raid1d(mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
mddev = r1_bio->mddev;
- conf = mddev_to_conf(mddev);
+ conf = mddev->private;
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
sync_request_write(mddev, r1_bio);
unplug = 1;
@@ -1706,7 +1706,7 @@ static int init_resync(conf_t *conf)
static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
r1bio_t *r1_bio;
struct bio *bio;
sector_t max_sector, nr_sectors;
@@ -2052,6 +2052,10 @@ static int run(mddev_t *mddev)
goto out_free_conf;
}
+ if (mddev->recovery_cp != MaxSector)
+ printk(KERN_NOTICE "raid1: %s is not clean"
+ " -- starting background reconstruction\n",
+ mdname(mddev));
printk(KERN_INFO
"raid1: raid set %s active with %d out of %d mirrors\n",
mdname(mddev), mddev->raid_disks - mddev->degraded,
@@ -2087,7 +2091,7 @@ out:
static int stop(mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
struct bitmap *bitmap = mddev->bitmap;
int behind_wait = 0;
@@ -2155,16 +2159,16 @@ static int raid1_reshape(mddev_t *mddev)
mempool_t *newpool, *oldpool;
struct pool_info *newpoolinfo;
mirror_info_t *newmirrors;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int cnt, raid_disks;
unsigned long flags;
int d, d2, err;
/* Cannot change chunk_size, layout, or level */
- if (mddev->chunk_size != mddev->new_chunk ||
+ if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
mddev->layout != mddev->new_layout ||
mddev->level != mddev->new_level) {
- mddev->new_chunk = mddev->chunk_size;
+ mddev->new_chunk_sectors = mddev->chunk_sectors;
mddev->new_layout = mddev->layout;
mddev->new_level = mddev->level;
return -EINVAL;
@@ -2252,7 +2256,7 @@ static int raid1_reshape(mddev_t *mddev)
static void raid1_quiesce(mddev_t *mddev, int state)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
switch(state) {
case 1:
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 1620eea3d57..e87b84deff6 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -64,12 +64,6 @@ struct r1_private_data_s {
typedef struct r1_private_data_s conf_t;
/*
- * this is the only point in the RAID code where we violate
- * C type safety. mddev->private is an 'opaque' pointer.
- */
-#define mddev_to_conf(mddev) ((conf_t *) mddev->private)
-
-/*
* this is our 'private' RAID1 bio.
*
* it contains information about what kind of IO operations were started
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 750550c1166..ae12ceafe10 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -188,7 +188,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
static void free_r10bio(r10bio_t *r10_bio)
{
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
/*
* Wake up any possible resync thread that waits for the device
@@ -202,7 +202,7 @@ static void free_r10bio(r10bio_t *r10_bio)
static void put_buf(r10bio_t *r10_bio)
{
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
mempool_free(r10_bio, conf->r10buf_pool);
@@ -213,7 +213,7 @@ static void reschedule_retry(r10bio_t *r10_bio)
{
unsigned long flags;
mddev_t *mddev = r10_bio->mddev;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
spin_lock_irqsave(&conf->device_lock, flags);
list_add(&r10_bio->retry_list, &conf->retry_list);
@@ -245,7 +245,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio)
*/
static inline void update_head_pos(int slot, r10bio_t *r10_bio)
{
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
conf->mirrors[r10_bio->devs[slot].devnum].head_position =
r10_bio->devs[slot].addr + (r10_bio->sectors);
@@ -256,7 +256,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
int slot, dev;
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
slot = r10_bio->read_slot;
@@ -297,7 +297,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
int slot, dev;
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
for (slot = 0; slot < conf->copies; slot++)
if (r10_bio->devs[slot].bio == bio)
@@ -461,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
mddev_t *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
@@ -596,7 +596,7 @@ rb_out:
static void unplug_slaves(mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
rcu_read_lock();
@@ -628,7 +628,7 @@ static void raid10_unplug(struct request_queue *q)
static int raid10_congested(void *data, int bits)
{
mddev_t *mddev = data;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i, ret = 0;
rcu_read_lock();
@@ -788,7 +788,7 @@ static void unfreeze_array(conf_t *conf)
static int make_request(struct request_queue *q, struct bio * bio)
{
mddev_t *mddev = q->queuedata;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
mirror_info_t *mirror;
r10bio_t *r10_bio;
struct bio *read_bio;
@@ -981,11 +981,11 @@ static int make_request(struct request_queue *q, struct bio * bio)
static void status(struct seq_file *seq, mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i;
if (conf->near_copies < conf->raid_disks)
- seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
+ seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
if (conf->near_copies > 1)
seq_printf(seq, " %d near-copies", conf->near_copies);
if (conf->far_copies > 1) {
@@ -1006,7 +1006,7 @@ static void status(struct seq_file *seq, mddev_t *mddev)
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{
char b[BDEVNAME_SIZE];
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
/*
* If it is not operational, then we have already marked it as dead
@@ -1215,7 +1215,7 @@ abort:
static void end_sync_read(struct bio *bio, int error)
{
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
- conf_t *conf = mddev_to_conf(r10_bio->mddev);
+ conf_t *conf = r10_bio->mddev->private;
int i,d;
for (i=0; i<conf->copies; i++)
@@ -1253,7 +1253,7 @@ static void end_sync_write(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
mddev_t *mddev = r10_bio->mddev;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i,d;
for (i = 0; i < conf->copies; i++)
@@ -1300,7 +1300,7 @@ static void end_sync_write(struct bio *bio, int error)
*/
static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i, first;
struct bio *tbio, *fbio;
@@ -1400,7 +1400,7 @@ done:
static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
int i, d;
struct bio *bio, *wbio;
@@ -1549,7 +1549,7 @@ static void raid10d(mddev_t *mddev)
r10bio_t *r10_bio;
struct bio *bio;
unsigned long flags;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
int unplug=0;
mdk_rdev_t *rdev;
@@ -1572,7 +1572,7 @@ static void raid10d(mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
mddev = r10_bio->mddev;
- conf = mddev_to_conf(mddev);
+ conf = mddev->private;
if (test_bit(R10BIO_IsSync, &r10_bio->state)) {
sync_request_write(mddev, r10_bio);
unplug = 1;
@@ -1680,7 +1680,7 @@ static int init_resync(conf_t *conf)
static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
r10bio_t *r10_bio;
struct bio *biolist = NULL, *bio;
sector_t max_sector, nr_sectors;
@@ -2026,7 +2026,7 @@ static sector_t
raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
sector_t size;
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
if (!raid_disks)
raid_disks = mddev->raid_disks;
@@ -2050,9 +2050,10 @@ static int run(mddev_t *mddev)
int nc, fc, fo;
sector_t stride, size;
- if (mddev->chunk_size < PAGE_SIZE) {
+ if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
+ !is_power_of_2(mddev->chunk_sectors)) {
printk(KERN_ERR "md/raid10: chunk size must be "
- "at least PAGE_SIZE(%ld).\n", PAGE_SIZE);
+ "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE);
return -EINVAL;
}
@@ -2095,8 +2096,8 @@ static int run(mddev_t *mddev)
conf->far_copies = fc;
conf->copies = nc*fc;
conf->far_offset = fo;
- conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
- conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
+ conf->chunk_mask = mddev->chunk_sectors - 1;
+ conf->chunk_shift = ffz(~mddev->chunk_sectors);
size = mddev->dev_sectors >> conf->chunk_shift;
sector_div(size, fc);
size = size * conf->raid_disks;
@@ -2185,6 +2186,10 @@ static int run(mddev_t *mddev)
goto out_free_conf;
}
+ if (mddev->recovery_cp != MaxSector)
+ printk(KERN_NOTICE "raid10: %s is not clean"
+ " -- starting background reconstruction\n",
+ mdname(mddev));
printk(KERN_INFO
"raid10: raid set %s active with %d out of %d devices\n",
mdname(mddev), mddev->raid_disks - mddev->degraded,
@@ -2204,7 +2209,8 @@ static int run(mddev_t *mddev)
* maybe...
*/
{
- int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE);
+ int stripe = conf->raid_disks *
+ ((mddev->chunk_sectors << 9) / PAGE_SIZE);
stripe /= conf->near_copies;
if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
@@ -2227,7 +2233,7 @@ out:
static int stop(mddev_t *mddev)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
raise_barrier(conf, 0);
lower_barrier(conf);
@@ -2245,7 +2251,7 @@ static int stop(mddev_t *mddev)
static void raid10_quiesce(mddev_t *mddev, int state)
{
- conf_t *conf = mddev_to_conf(mddev);
+ conf_t *conf = mddev->private;
switch(state) {
case 1:
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 244dbe507a5..59cd1efb8d3 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -62,12 +62,6 @@ struct r10_private_data_s {
typedef struct r10_private_data_s conf_t;
/*
- * this is the only point in the RAID code where we violate
- * C type safety. mddev->private is an 'opaque' pointer.
- */
-#define mddev_to_conf(mddev) ((conf_t *) mddev->private)
-
-/*
* this is our 'private' RAID10 bio.
*
* it contains information about what kind of IO operations were started
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bef87669823..f9f991e6e13 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1274,8 +1274,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
sector_t new_sector;
int algorithm = previous ? conf->prev_algo
: conf->algorithm;
- int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+ : conf->chunk_sectors;
int raid_disks = previous ? conf->previous_raid_disks
: conf->raid_disks;
int data_disks = raid_disks - conf->max_degraded;
@@ -1480,8 +1480,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
int raid_disks = sh->disks;
int data_disks = raid_disks - conf->max_degraded;
sector_t new_sector = sh->sector, check;
- int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ int sectors_per_chunk = previous ? conf->prev_chunk_sectors
+ : conf->chunk_sectors;
int algorithm = previous ? conf->prev_algo
: conf->algorithm;
sector_t stripe;
@@ -1997,8 +1997,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
struct stripe_head *sh)
{
int sectors_per_chunk =
- previous ? (conf->prev_chunk >> 9)
- : (conf->chunk_size >> 9);
+ previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
int dd_idx;
int chunk_offset = sector_div(stripe, sectors_per_chunk);
int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@ -3284,7 +3283,7 @@ static void activate_bit_delay(raid5_conf_t *conf)
static void unplug_slaves(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int i;
rcu_read_lock();
@@ -3308,7 +3307,7 @@ static void unplug_slaves(mddev_t *mddev)
static void raid5_unplug_device(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
@@ -3327,7 +3326,7 @@ static void raid5_unplug_device(struct request_queue *q)
static int raid5_congested(void *data, int bits)
{
mddev_t *mddev = data;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
@@ -3352,14 +3351,14 @@ static int raid5_mergeable_bvec(struct request_queue *q,
mddev_t *mddev = q->queuedata;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
- if (mddev->new_chunk < mddev->chunk_size)
- chunk_sectors = mddev->new_chunk >> 9;
+ if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+ chunk_sectors = mddev->new_chunk_sectors;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0;
if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3372,11 +3371,11 @@ static int raid5_mergeable_bvec(struct request_queue *q,
static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
{
sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
- unsigned int chunk_sectors = mddev->chunk_size >> 9;
+ unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bio->bi_size >> 9;
- if (mddev->new_chunk < mddev->chunk_size)
- chunk_sectors = mddev->new_chunk >> 9;
+ if (mddev->new_chunk_sectors < mddev->chunk_sectors)
+ chunk_sectors = mddev->new_chunk_sectors;
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
}
@@ -3440,7 +3439,7 @@ static void raid5_align_endio(struct bio *bi, int error)
bio_put(bi);
mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
- conf = mddev_to_conf(mddev);
+ conf = mddev->private;
rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL;
@@ -3482,7 +3481,7 @@ static int bio_fits_rdev(struct bio *bi)
static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned int dd_idx;
struct bio* align_bi;
mdk_rdev_t *rdev;
@@ -3599,7 +3598,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf)
static int make_request(struct request_queue *q, struct bio * bi)
{
mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int dd_idx;
sector_t new_sector;
sector_t logical_sector, last_sector;
@@ -3696,6 +3695,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
spin_unlock_irq(&conf->device_lock);
if (must_retry) {
release_stripe(sh);
+ schedule();
goto retry;
}
}
@@ -3791,10 +3791,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
* If old and new chunk sizes differ, we need to process the
* largest of these
*/
- if (mddev->new_chunk > mddev->chunk_size)
- reshape_sectors = mddev->new_chunk / 512;
+ if (mddev->new_chunk_sectors > mddev->chunk_sectors)
+ reshape_sectors = mddev->new_chunk_sectors;
else
- reshape_sectors = mddev->chunk_size / 512;
+ reshape_sectors = mddev->chunk_sectors;
/* we update the metadata when there is more than 3Meg
* in the block range (that is rather arbitrary, should
@@ -3917,7 +3917,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
1, &dd_idx, NULL);
last_sector =
raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
- *(new_data_disks) - 1),
+ * new_data_disks - 1),
1, &dd_idx, NULL);
if (last_sector >= mddev->dev_sectors)
last_sector = mddev->dev_sectors - 1;
@@ -3946,7 +3946,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->reshape_progress;
- mddev->curr_resync_completed = mddev->curr_resync;
+ mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors;
conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
@@ -4129,7 +4129,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
static void raid5d(mddev_t *mddev)
{
struct stripe_head *sh;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
int handled;
pr_debug("+++ raid5d active\n");
@@ -4185,7 +4185,7 @@ static void raid5d(mddev_t *mddev)
static ssize_t
raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->max_nr_stripes);
else
@@ -4195,7 +4195,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
static ssize_t
raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long new;
int err;
@@ -4233,7 +4233,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
static ssize_t
raid5_show_preread_threshold(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->bypass_threshold);
else
@@ -4243,7 +4243,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page)
static ssize_t
raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
unsigned long new;
if (len >= PAGE_SIZE)
return -EINVAL;
@@ -4267,7 +4267,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
static ssize_t
stripe_cache_active_show(mddev_t *mddev, char *page)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
else
@@ -4291,7 +4291,7 @@ static struct attribute_group raid5_attrs_group = {
static sector_t
raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (!sectors)
sectors = mddev->dev_sectors;
@@ -4303,8 +4303,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
raid_disks = conf->previous_raid_disks;
}
- sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
- sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+ sectors &= ~((sector_t)mddev->chunk_sectors - 1);
+ sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
return sectors * (raid_disks - conf->max_degraded);
}
@@ -4336,9 +4336,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
return ERR_PTR(-EINVAL);
}
- if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+ if (!mddev->new_chunk_sectors ||
+ (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
+ !is_power_of_2(mddev->new_chunk_sectors)) {
printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
- mddev->new_chunk, mdname(mddev));
+ mddev->new_chunk_sectors << 9, mdname(mddev));
return ERR_PTR(-EINVAL);
}
@@ -4401,7 +4403,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
conf->fullsync = 1;
}
- conf->chunk_size = mddev->new_chunk;
+ conf->chunk_sectors = mddev->new_chunk_sectors;
conf->level = mddev->new_level;
if (conf->level == 6)
conf->max_degraded = 2;
@@ -4411,7 +4413,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
conf->max_nr_stripes = NR_STRIPES;
conf->reshape_progress = mddev->reshape_position;
if (conf->reshape_progress != MaxSector) {
- conf->prev_chunk = mddev->chunk_size;
+ conf->prev_chunk_sectors = mddev->chunk_sectors;
conf->prev_algo = mddev->layout;
}
@@ -4453,6 +4455,10 @@ static int run(mddev_t *mddev)
int working_disks = 0;
mdk_rdev_t *rdev;
+ if (mddev->recovery_cp != MaxSector)
+ printk(KERN_NOTICE "raid5: %s is not clean"
+ " -- starting background reconstruction\n",
+ mdname(mddev));
if (mddev->reshape_position != MaxSector) {
/* Check that we can continue the reshape.
* Currently only disks can change, it must
@@ -4475,7 +4481,7 @@ static int run(mddev_t *mddev)
* geometry.
*/
here_new = mddev->reshape_position;
- if (sector_div(here_new, (mddev->new_chunk>>9)*
+ if (sector_div(here_new, mddev->new_chunk_sectors *
(mddev->raid_disks - max_degraded))) {
printk(KERN_ERR "raid5: reshape_position not "
"on a stripe boundary\n");
@@ -4483,7 +4489,7 @@ static int run(mddev_t *mddev)
}
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
- sector_div(here_old, (mddev->chunk_size>>9)*
+ sector_div(here_old, mddev->chunk_sectors *
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
@@ -4498,7 +4504,7 @@ static int run(mddev_t *mddev)
} else {
BUG_ON(mddev->level != mddev->new_level);
BUG_ON(mddev->layout != mddev->new_layout);
- BUG_ON(mddev->chunk_size != mddev->new_chunk);
+ BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
BUG_ON(mddev->delta_disks != 0);
}
@@ -4532,7 +4538,7 @@ static int run(mddev_t *mddev)
}
/* device size must be a multiple of chunk size */
- mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+ mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
mddev->resync_max_sectors = mddev->dev_sectors;
if (mddev->degraded > 0 &&
@@ -4581,7 +4587,7 @@ static int run(mddev_t *mddev)
{
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
- (mddev->chunk_size / PAGE_SIZE);
+ ((mddev->chunk_sectors << 9) / PAGE_SIZE);
if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
}
@@ -4678,7 +4684,8 @@ static void status(struct seq_file *seq, mddev_t *mddev)
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
int i;
- seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
+ seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
+ mddev->chunk_sectors / 2, mddev->layout);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++)
seq_printf (seq, "%s",
@@ -4826,7 +4833,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
- sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+ sectors &= ~((sector_t)mddev->chunk_sectors - 1);
md_set_array_sectors(mddev, raid5_size(mddev, sectors,
mddev->raid_disks));
if (mddev->array_sectors >
@@ -4843,14 +4850,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return 0;
}
-static int raid5_check_reshape(mddev_t *mddev)
+static int check_stripe_cache(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ /* Can only proceed if there are plenty of stripe_heads.
+ * We need a minimum of one full stripe,, and for sensible progress
+ * it is best to have about 4 times that.
+ * If we require 4 times, then the default 256 4K stripe_heads will
+ * allow for chunk sizes up to 256K, which is probably OK.
+ * If the chunk size is greater, user-space should request more
+ * stripe_heads first.
+ */
+ raid5_conf_t *conf = mddev->private;
+ if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+ > conf->max_nr_stripes ||
+ ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+ > conf->max_nr_stripes) {
+ printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
+ ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
+ / STRIPE_SIZE)*4);
+ return 0;
+ }
+ return 1;
+}
+
+static int check_reshape(mddev_t *mddev)
+{
+ raid5_conf_t *conf = mddev->private;
if (mddev->delta_disks == 0 &&
mddev->new_layout == mddev->layout &&
- mddev->new_chunk == mddev->chunk_size)
- return -EINVAL; /* nothing to do */
+ mddev->new_chunk_sectors == mddev->chunk_sectors)
+ return 0; /* nothing to do */
if (mddev->bitmap)
/* Cannot grow a bitmap yet */
return -EBUSY;
@@ -4869,28 +4899,15 @@ static int raid5_check_reshape(mddev_t *mddev)
return -EINVAL;
}
- /* Can only proceed if there are plenty of stripe_heads.
- * We need a minimum of one full stripe,, and for sensible progress
- * it is best to have about 4 times that.
- * If we require 4 times, then the default 256 4K stripe_heads will
- * allow for chunk sizes up to 256K, which is probably OK.
- * If the chunk size is greater, user-space should request more
- * stripe_heads first.
- */
- if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
- (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
- printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
- (max(mddev->chunk_size, mddev->new_chunk)
- / STRIPE_SIZE)*4);
+ if (!check_stripe_cache(mddev))
return -ENOSPC;
- }
return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
}
static int raid5_start_reshape(mddev_t *mddev)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
mdk_rdev_t *rdev;
int spares = 0;
int added_devices = 0;
@@ -4899,6 +4916,9 @@ static int raid5_start_reshape(mddev_t *mddev)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
+ if (!check_stripe_cache(mddev))
+ return -ENOSPC;
+
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags))
@@ -4925,8 +4945,8 @@ static int raid5_start_reshape(mddev_t *mddev)
spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
conf->raid_disks += mddev->delta_disks;
- conf->prev_chunk = conf->chunk_size;
- conf->chunk_size = mddev->new_chunk;
+ conf->prev_chunk_sectors = conf->chunk_sectors;
+ conf->chunk_sectors = mddev->new_chunk_sectors;
conf->prev_algo = conf->algorithm;
conf->algorithm = mddev->new_layout;
if (mddev->delta_disks < 0)
@@ -5008,7 +5028,7 @@ static void end_reshape(raid5_conf_t *conf)
*/
{
int data_disks = conf->raid_disks - conf->max_degraded;
- int stripe = data_disks * (conf->chunk_size
+ int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);
if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
@@ -5022,7 +5042,7 @@ static void end_reshape(raid5_conf_t *conf)
static void raid5_finish_reshape(mddev_t *mddev)
{
struct block_device *bdev;
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5053,7 +5073,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
raid5_remove_disk(mddev, d);
}
mddev->layout = conf->algorithm;
- mddev->chunk_size = conf->chunk_size;
+ mddev->chunk_sectors = conf->chunk_sectors;
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
}
@@ -5061,7 +5081,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
static void raid5_quiesce(mddev_t *mddev, int state)
{
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
switch(state) {
case 2: /* resume for a suspend */
@@ -5111,7 +5131,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev)
mddev->new_level = 5;
mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
- mddev->new_chunk = chunksect << 9;
+ mddev->new_chunk_sectors = chunksect;
return setup_conf(mddev);
}
@@ -5150,24 +5170,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev)
}
-static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+static int raid5_check_reshape(mddev_t *mddev)
{
/* For a 2-drive array, the layout and chunk size can be changed
* immediately as not restriping is needed.
* For larger arrays we record the new value - after validation
* to be used by a reshape pass.
*/
- raid5_conf_t *conf = mddev_to_conf(mddev);
+ raid5_conf_t *conf = mddev->private;
+ int new_chunk = mddev->new_chunk_sectors;
- if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+ if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
return -EINVAL;
if (new_chunk > 0) {
- if (new_chunk & (new_chunk-1))
- /* not a power of 2 */
+ if (!is_power_of_2(new_chunk))
return -EINVAL;
- if (new_chunk < PAGE_SIZE)
+ if (new_chunk < (PAGE_SIZE>>9))
return -EINVAL;
- if (mddev->array_sectors & ((new_chunk>>9)-1))
+ if (mddev->array_sectors & (new_chunk-1))
/* not factor of array size */
return -EINVAL;
}
@@ -5175,49 +5195,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
/* They look valid */
if (mddev->raid_disks == 2) {
-
- if (new_layout >= 0) {
- conf->algorithm = new_layout;
- mddev->layout = mddev->new_layout = new_layout;
+ /* can make the change immediately */
+ if (mddev->new_layout >= 0) {
+ conf->algorithm = mddev->new_layout;
+ mddev->layout = mddev->new_layout;
}
if (new_chunk > 0) {
- conf->chunk_size = new_chunk;
- mddev->chunk_size = mddev->new_chunk = new_chunk;
+ conf->chunk_sectors = new_chunk ;
+ mddev->chunk_sectors = new_chunk;
}
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
- } else {
- if (new_layout >= 0)
- mddev->new_layout = new_layout;
- if (new_chunk > 0)
- mddev->new_chunk = new_chunk;
}
- return 0;
+ return check_reshape(mddev);
}
-static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+static int raid6_check_reshape(mddev_t *mddev)
{
- if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+ int new_chunk = mddev->new_chunk_sectors;
+
+ if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
return -EINVAL;
if (new_chunk > 0) {
- if (new_chunk & (new_chunk-1))
- /* not a power of 2 */
+ if (!is_power_of_2(new_chunk))
return -EINVAL;
- if (new_chunk < PAGE_SIZE)
+ if (new_chunk < (PAGE_SIZE >> 9))
return -EINVAL;
- if (mddev->array_sectors & ((new_chunk>>9)-1))
+ if (mddev->array_sectors & (new_chunk-1))
/* not factor of array size */
return -EINVAL;
}
/* They look valid */
-
- if (new_layout >= 0)
- mddev->new_layout = new_layout;
- if (new_chunk > 0)
- mddev->new_chunk = new_chunk;
-
- return 0;
+ return check_reshape(mddev);
}
static void *raid5_takeover(mddev_t *mddev)
@@ -5227,8 +5237,6 @@ static void *raid5_takeover(mddev_t *mddev)
* raid1 - if there are two drives. We need to know the chunk size
* raid4 - trivial - just use a raid4 layout.
* raid6 - Providing it is a *_6 layout
- *
- * For now, just do raid1
*/
if (mddev->level == 1)
@@ -5310,12 +5318,11 @@ static struct mdk_personality raid6_personality =
.sync_request = sync_request,
.resize = raid5_resize,
.size = raid5_size,
- .check_reshape = raid5_check_reshape,
+ .check_reshape = raid6_check_reshape,
.start_reshape = raid5_start_reshape,
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
- .reconfig = raid6_reconfig,
};
static struct mdk_personality raid5_personality =
{
@@ -5338,7 +5345,6 @@ static struct mdk_personality raid5_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
- .reconfig = raid5_reconfig,
};
static struct mdk_personality raid4_personality =
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 52ba99954de..9459689c4ea 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -334,7 +334,8 @@ struct raid5_private_data {
struct hlist_head *stripe_hashtbl;
mddev_t *mddev;
struct disk_info *spare;
- int chunk_size, level, algorithm;
+ int chunk_sectors;
+ int level, algorithm;
int max_degraded;
int raid_disks;
int max_nr_stripes;
@@ -350,7 +351,8 @@ struct raid5_private_data {
*/
sector_t reshape_safe;
int previous_raid_disks;
- int prev_chunk, prev_algo;
+ int prev_chunk_sectors;
+ int prev_algo;
short generation; /* increments with every reshape */
unsigned long reshape_checkpoint; /* Time we last updated
* metadata */
@@ -408,8 +410,6 @@ struct raid5_private_data {
typedef struct raid5_private_data raid5_conf_t;
-#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private)
-
/*
* Our supported algorithms
*/
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
index bcd8136d2f9..7c4c306dfa8 100644
--- a/drivers/misc/sgi-gru/Makefile
+++ b/drivers/misc/sgi-gru/Makefile
@@ -3,5 +3,5 @@ ifdef CONFIG_SGI_GRU_DEBUG
endif
obj-$(CONFIG_SGI_GRU) := gru.o
-gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
index 3fde33c1e8f..3c9c06618e6 100644
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -81,6 +81,8 @@ struct control_block_extended_exc_detail {
int exopc;
long exceptdet0;
int exceptdet1;
+ int cbrstate;
+ int cbrexecstatus;
};
/*
@@ -107,7 +109,8 @@ struct gru_instruction_bits {
unsigned char reserved2: 2;
unsigned char istatus: 2;
unsigned char isubstatus:4;
- unsigned char reserved3: 2;
+ unsigned char reserved3: 1;
+ unsigned char tlb_fault_color: 1;
/* DW 1 */
unsigned long idef4; /* 42 bits: TRi1, BufSize */
/* DW 2-6 */
@@ -250,17 +253,37 @@ struct gru_instruction {
#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
-#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
-#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT 0
+#define CBR_EXS_INT_OCC_BIT 1
+#define CBR_EXS_PENDING_BIT 2
+#define CBR_EXS_QUEUED_BIT 3
+#define CBR_EXS_TLB_INVAL_BIT 4
+#define CBR_EXS_EXCEPTION_BIT 5
+
+#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
+#define CBR_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
/*
* Exceptions are retried for the following cases. If any OTHER bits are set
* in ecause, the exception is not retryable.
*/
-#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
- CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \
CBE_CAUSE_TLBHW_ERROR | \
- CBE_CAUSE_HA_REQUEST_TIMEOUT)
+ CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+ CBE_CAUSE_RA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_HA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \
+ CBE_CAUSE_HA_RESPONSE_DATA_ERROR \
+ )
/* Message queue head structure */
union gru_mesqhead {
@@ -600,9 +623,11 @@ static inline int gru_get_cb_substatus(void *cb)
return cbs->isubstatus;
}
-/* Check the status of a CB. If the CB is in UPM mode, call the
- * OS to handle the UPM status.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
*/
static inline int gru_check_status(void *cb)
{
@@ -610,34 +635,31 @@ static inline int gru_check_status(void *cb)
int ret;
ret = cbs->istatus;
- if (ret == CBS_CALL_OS)
+ if (ret != CBS_ACTIVE)
ret = gru_check_status_proc(cb);
return ret;
}
-/* Wait for CB to complete.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
*/
static inline int gru_wait(void *cb)
{
- struct gru_control_block_status *cbs = (void *)cb;
- int ret = cbs->istatus;
-
- if (ret != CBS_IDLE)
- ret = gru_wait_proc(cb);
- return ret;
+ return gru_wait_proc(cb);
}
-/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
* mean TLB mis - only fatal errors such as memory parity error or user
* bugs will cause termination.
*/
static inline void gru_wait_abort(void *cb)
{
- struct gru_control_block_status *cbs = (void *)cb;
-
- if (cbs->istatus != CBS_IDLE)
- gru_wait_abort_proc(cb);
+ gru_wait_abort_proc(cb);
}
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index ab118558552..679e0177828 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -166,7 +166,8 @@ static inline struct gru_state *irq_to_gru(int irq)
* the GRU, atomic operations must be used to clear bits.
*/
static void get_clear_fault_map(struct gru_state *gru,
- struct gru_tlb_fault_map *map)
+ struct gru_tlb_fault_map *imap,
+ struct gru_tlb_fault_map *dmap)
{
unsigned long i, k;
struct gru_tlb_fault_map *tfm;
@@ -177,7 +178,11 @@ static void get_clear_fault_map(struct gru_state *gru,
k = tfm->fault_bits[i];
if (k)
k = xchg(&tfm->fault_bits[i], 0UL);
- map->fault_bits[i] = k;
+ imap->fault_bits[i] = k;
+ k = tfm->done_bits[i];
+ if (k)
+ k = xchg(&tfm->done_bits[i], 0UL);
+ dmap->fault_bits[i] = k;
}
/*
@@ -334,6 +339,12 @@ static int gru_try_dropin(struct gru_thread_state *gts,
* Might be a hardware race OR a stupid user. Ignore FMM because FMM
* is a transient state.
*/
+ if (tfh->status != TFHSTATUS_EXCEPTION) {
+ gru_flush_cache(tfh);
+ if (tfh->status != TFHSTATUS_EXCEPTION)
+ goto failnoexception;
+ STAT(tfh_stale_on_fault);
+ }
if (tfh->state == TFHSTATE_IDLE)
goto failidle;
if (tfh->state == TFHSTATE_MISS_FMM && cb)
@@ -401,8 +412,17 @@ failfmm:
gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;
+failnoexception:
+ /* TFH status did not show exception pending */
+ gru_flush_cache(tfh);
+ if (cb)
+ gru_flush_cache(cb);
+ STAT(tlb_dropin_fail_no_exception);
+ gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state);
+ return 0;
+
failidle:
- /* TFH was idle - no miss pending */
+ /* TFH state was idle - no miss pending */
gru_flush_cache(tfh);
if (cb)
gru_flush_cache(cb);
@@ -438,7 +458,7 @@ failactive:
irqreturn_t gru_intr(int irq, void *dev_id)
{
struct gru_state *gru;
- struct gru_tlb_fault_map map;
+ struct gru_tlb_fault_map imap, dmap;
struct gru_thread_state *gts;
struct gru_tlb_fault_handle *tfh = NULL;
int cbrnum, ctxnum;
@@ -451,11 +471,15 @@ irqreturn_t gru_intr(int irq, void *dev_id)
raw_smp_processor_id(), irq);
return IRQ_NONE;
}
- get_clear_fault_map(gru, &map);
- gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
- map.fault_bits[0]);
+ get_clear_fault_map(gru, &imap, &dmap);
+
+ for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+ complete(gru->gs_blade->bs_async_wq);
+ gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+ gru->gs_gid, cbrnum, gru->gs_blade->bs_async_wq->done);
+ }
- for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
+ for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
tfh = get_tfh_by_index(gru, cbrnum);
prefetchw(tfh); /* Helps on hdw, required for emulator */
@@ -472,7 +496,9 @@ irqreturn_t gru_intr(int irq, void *dev_id)
* This is running in interrupt context. Trylock the mmap_sem.
* If it fails, retry the fault in user context.
*/
- if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
+ if (!gts->ts_force_cch_reload &&
+ down_read_trylock(&gts->ts_mm->mmap_sem)) {
+ gts->ustats.fmm_tlbdropin++;
gru_try_dropin(gts, tfh, NULL);
up_read(&gts->ts_mm->mmap_sem);
} else {
@@ -491,6 +517,7 @@ static int gru_user_dropin(struct gru_thread_state *gts,
struct gru_mm_struct *gms = gts->ts_gms;
int ret;
+ gts->ustats.upm_tlbdropin++;
while (1) {
wait_event(gms->ms_wait_queue,
atomic_read(&gms->ms_range_active) == 0);
@@ -546,8 +573,8 @@ int gru_handle_user_call_os(unsigned long cb)
* CCH may contain stale data if ts_force_cch_reload is set.
*/
if (gts->ts_gru && gts->ts_force_cch_reload) {
- gru_update_cch(gts, 0);
gts->ts_force_cch_reload = 0;
+ gru_update_cch(gts, 0);
}
ret = -EAGAIN;
@@ -589,20 +616,26 @@ int gru_get_exception_detail(unsigned long arg)
} else if (gts->ts_gru) {
cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
- prefetchw(cbe);/* Harmless on hardware, required for emulator */
+ gru_flush_cache(cbe); /* CBE not coherent */
excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause;
excdet.exceptdet0 = cbe->idef1upd;
excdet.exceptdet1 = cbe->idef3upd;
+ excdet.cbrstate = cbe->cbrstate;
+ excdet.cbrexecstatus = cbe->cbrexecstatus;
+ gru_flush_cache(cbe);
ret = 0;
} else {
ret = -EAGAIN;
}
gru_unlock_gts(gts);
- gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
- excdet.ecause);
+ gru_dbg(grudev,
+ "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+ "exdet0 0x%lx, exdet1 0x%x\n",
+ excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+ excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
ret = -EFAULT;
return ret;
@@ -627,7 +660,7 @@ static int gru_unload_all_contexts(void)
if (gts && mutex_trylock(&gts->ts_ctxlock)) {
spin_unlock(&gru->gs_lock);
gru_unload_context(gts, 1);
- gru_unlock_gts(gts);
+ mutex_unlock(&gts->ts_ctxlock);
spin_lock(&gru->gs_lock);
}
}
@@ -669,6 +702,7 @@ int gru_user_flush_tlb(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_flush_tlb_req req;
+ struct gru_mm_struct *gms;
STAT(user_flush_tlb);
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
@@ -681,8 +715,34 @@ int gru_user_flush_tlb(unsigned long arg)
if (!gts)
return -EINVAL;
- gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len);
+ gms = gts->ts_gms;
gru_unlock_gts(gts);
+ gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+ return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_get_gseg_statistics_req req;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (gts) {
+ memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+ gru_unlock_gts(gts);
+ } else {
+ memset(&req.stats, 0, sizeof(gts->ustats));
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
return 0;
}
@@ -691,18 +751,34 @@ int gru_user_flush_tlb(unsigned long arg)
* Register the current task as the user of the GSEG slice.
* Needed for TLB fault interrupt targeting.
*/
-int gru_set_task_slice(long address)
+int gru_set_context_option(unsigned long arg)
{
struct gru_thread_state *gts;
+ struct gru_set_context_option_req req;
+ int ret = 0;
+
+ STAT(set_context_option);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+ gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
- STAT(set_task_slice);
- gru_dbg(grudev, "address 0x%lx\n", address);
- gts = gru_alloc_locked_gts(address);
+ gts = gru_alloc_locked_gts(req.gseg);
if (!gts)
return -EINVAL;
- gts->ts_tgid_owner = current->tgid;
+ switch (req.op) {
+ case sco_gseg_owner:
+ /* Register the current task as the GSEG owner */
+ gts->ts_tgid_owner = current->tgid;
+ break;
+ case sco_cch_req_slice:
+ /* Set the CCH slice option */
+ gts->ts_cch_req_slice = req.val1 & 3;
+ break;
+ default:
+ ret = -EINVAL;
+ }
gru_unlock_gts(gts);
- return 0;
+ return ret;
}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 3ce2920e2bf..fa2d93a9fb8 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -46,6 +46,7 @@
struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
unsigned long gru_end_paddr __read_mostly;
unsigned int gru_max_gids __read_mostly;
struct gru_stats_s gru_stats;
@@ -135,11 +136,9 @@ static int gru_create_new_context(unsigned long arg)
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
- if (req.data_segment_bytes == 0 ||
- req.data_segment_bytes > max_user_dsr_bytes)
+ if (req.data_segment_bytes > max_user_dsr_bytes)
return -EINVAL;
- if (!req.control_blocks || !req.maximum_thread_count ||
- req.control_blocks > max_user_cbrs)
+ if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
return -EINVAL;
if (!(req.options & GRU_OPT_MISS_MASK))
@@ -184,41 +183,6 @@ static long gru_get_config_info(unsigned long arg)
}
/*
- * Get GRU chiplet status
- */
-static long gru_get_chiplet_status(unsigned long arg)
-{
- struct gru_state *gru;
- struct gru_chiplet_info info;
-
- if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- return -EFAULT;
-
- if (info.node == -1)
- info.node = numa_node_id();
- if (info.node >= num_possible_nodes() ||
- info.chiplet >= GRU_CHIPLETS_PER_HUB ||
- info.node < 0 || info.chiplet < 0)
- return -EINVAL;
-
- info.blade = uv_node_to_blade_id(info.node);
- gru = get_gru(info.blade, info.chiplet);
-
- info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
- info.total_cbr = GRU_NUM_CB;
- info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
- gru->gs_reserved_dsr_bytes;
- info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
- info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
- GRU_DSR_AU_BYTES;
- info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
-
- if (copy_to_user((void __user *)arg, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-/*
* gru_file_unlocked_ioctl
*
* Called to update file attributes via IOCTL calls.
@@ -234,8 +198,8 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
case GRU_CREATE_CONTEXT:
err = gru_create_new_context(arg);
break;
- case GRU_SET_TASK_SLICE:
- err = gru_set_task_slice(arg);
+ case GRU_SET_CONTEXT_OPTION:
+ err = gru_set_context_option(arg);
break;
case GRU_USER_GET_EXCEPTION_DETAIL:
err = gru_get_exception_detail(arg);
@@ -243,18 +207,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
case GRU_USER_UNLOAD_CONTEXT:
err = gru_user_unload_context(arg);
break;
- case GRU_GET_CHIPLET_STATUS:
- err = gru_get_chiplet_status(arg);
- break;
case GRU_USER_FLUSH_TLB:
err = gru_user_flush_tlb(arg);
break;
case GRU_USER_CALL_OS:
err = gru_handle_user_call_os(arg);
break;
+ case GRU_GET_GSEG_STATISTICS:
+ err = gru_get_gseg_statistics(arg);
+ break;
+ case GRU_KTEST:
+ err = gru_ktest(arg);
+ break;
case GRU_GET_CONFIG_INFO:
err = gru_get_config_info(arg);
break;
+ case GRU_DUMP_CHIPLET_STATE:
+ err = gru_dump_chiplet_request(arg);
+ break;
}
return err;
}
@@ -282,7 +252,6 @@ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n",
bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
gru->gs_gru_base_paddr);
- gru_kservices_init(gru);
}
static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
@@ -309,6 +278,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
spin_lock_init(&gru_base[bid]->bs_lock);
+ init_rwsem(&gru_base[bid]->bs_kgts_sema);
dsrbytes = 0;
cbrs = 0;
@@ -372,7 +342,6 @@ static int __init gru_init(void)
{
int ret, irq, chip;
char id[10];
- void *gru_start_vaddr;
if (!is_uv_system())
return 0;
@@ -422,6 +391,7 @@ static int __init gru_init(void)
printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
goto exit3;
}
+ gru_kservices_init();
printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
GRU_DRIVER_VERSION_STR);
@@ -440,7 +410,7 @@ exit1:
static void __exit gru_exit(void)
{
- int i, bid, gid;
+ int i, bid;
int order = get_order(sizeof(struct gru_state) *
GRU_CHIPLETS_PER_BLADE);
@@ -449,10 +419,7 @@ static void __exit gru_exit(void)
for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
free_irq(IRQ_GRU + i, NULL);
-
- foreach_gid(gid)
- gru_kservices_exit(GID_TO_GRU(gid));
-
+ gru_kservices_exit();
for (bid = 0; bid < GRU_MAX_BLADES; bid++)
free_pages((unsigned long)gru_base[bid], order);
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 9b7ccb32869..37e7cfc53b9 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -57,7 +57,7 @@ static void start_instruction(void *h)
static int wait_instruction_complete(void *h, enum mcs_op opc)
{
int status;
- cycles_t start_time = get_cycles();
+ unsigned long start_time = get_cycles();
while (1) {
cpu_relax();
@@ -65,25 +65,16 @@ static int wait_instruction_complete(void *h, enum mcs_op opc)
if (status != CCHSTATUS_ACTIVE)
break;
if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time))
- panic("GRU %p is malfunctioning\n", h);
+ panic("GRU %p is malfunctioning: start %ld, end %ld\n",
+ h, start_time, (unsigned long)get_cycles());
}
if (gru_options & OPT_STATS)
update_mcs_stats(opc, get_cycles() - start_time);
return status;
}
-int cch_allocate(struct gru_context_configuration_handle *cch,
- int asidval, int sizeavail, unsigned long cbrmap,
- unsigned long dsrmap)
+int cch_allocate(struct gru_context_configuration_handle *cch)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- cch->asid[i] = (asidval++);
- cch->sizeavail[i] = sizeavail;
- }
- cch->dsr_allocation_map = dsrmap;
- cch->cbr_allocation_map = cbrmap;
cch->opc = CCHOP_ALLOCATE;
start_instruction(cch);
return wait_instruction_complete(cch, cchop_allocate);
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 1ed74d7508c..f44112242d0 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -39,7 +39,6 @@
#define GRU_NUM_CBE 128
#define GRU_NUM_TFH 128
#define GRU_NUM_CCH 16
-#define GRU_NUM_GSH 1
/* Maximum resource counts that can be reserved by user programs */
#define GRU_NUM_USER_CBR GRU_NUM_CBE
@@ -56,7 +55,6 @@
#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
-#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
/* User gseg constants */
#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
@@ -251,15 +249,15 @@ struct gru_tlb_fault_handle {
unsigned int fill1:9;
unsigned int status:2;
- unsigned int fill2:1;
- unsigned int color:1;
+ unsigned int fill2:2;
unsigned int state:3;
unsigned int fill3:1;
- unsigned int cause:7; /* DW 0 - high 32 */
+ unsigned int cause:6;
+ unsigned int cb_int:1;
unsigned int fill4:1;
- unsigned int indexway:12;
+ unsigned int indexway:12; /* DW 0 - high 32 */
unsigned int fill5:4;
unsigned int ctxnum:4;
@@ -457,21 +455,7 @@ enum gru_cbr_state {
CBRSTATE_BUSY_INTERRUPT,
};
-/* CBE cbrexecstatus bits */
-#define CBR_EXS_ABORT_OCC_BIT 0
-#define CBR_EXS_INT_OCC_BIT 1
-#define CBR_EXS_PENDING_BIT 2
-#define CBR_EXS_QUEUED_BIT 3
-#define CBR_EXS_TLBHW_BIT 4
-#define CBR_EXS_EXCEPTION_BIT 5
-
-#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
-#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
-#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
-#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
-#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
-#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
-
+/* CBE cbrexecstatus bits - defined in gru_instructions.h*/
/* CBE ecause bits - defined in gru_instructions.h */
/*
@@ -495,9 +479,7 @@ enum gru_cbr_state {
/* minimum TLB purge count to ensure a full purge */
#define GRUMAXINVAL 1024UL
-int cch_allocate(struct gru_context_configuration_handle *cch,
- int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap);
-
+int cch_allocate(struct gru_context_configuration_handle *cch);
int cch_start(struct gru_context_configuration_handle *cch);
int cch_interrupt(struct gru_context_configuration_handle *cch);
int cch_deallocate(struct gru_context_configuration_handle *cch);
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 00000000000..55eabfa8558
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,232 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * Dump GRU State
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS 10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+ if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+ return -1;
+ *dp += GRU_HANDLE_BYTES;
+ return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+ struct gru_context_configuration_handle *cch,
+ void __user *ubuf, int ctxnum, int dsrcnt)
+{
+ void *cb, *cbe, *tfh, *gseg;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ tfh = grubase + GRU_TFH_BASE;
+
+ for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+ if (gru_user_copy_handle(&ubuf, cb))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ cb += GRU_HANDLE_STRIDE;
+ }
+ if (dsrcnt)
+ memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+ return 0;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_fault_map *tfm;
+ int i, ret, bytes;
+
+ bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TFM; i++) {
+ tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tfm))
+ goto fail;
+ }
+ return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_global_handle *tgh;
+ int i, ret, bytes;
+
+ bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TGH; i++) {
+ tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tgh))
+ goto fail;
+ }
+ return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+ void __user *ubuf, void __user *ubufend, char data_opt,
+ char lock_cch)
+{
+ struct gru_dump_context_header hdr;
+ struct gru_dump_context_header __user *uhdr = ubuf;
+ struct gru_context_configuration_handle *cch, *ubufcch;
+ struct gru_thread_state *gts;
+ int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+ void *grubase;
+
+ memset(&hdr, 0, sizeof(hdr));
+ grubase = gru->gs_gru_base_vaddr;
+ cch = get_cch(grubase, ctxnum);
+ for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+ cch_locked = trylock_cch_handle(cch);
+ if (cch_locked)
+ break;
+ msleep(1);
+ }
+
+ ubuf += sizeof(hdr);
+ ubufcch = ubuf;
+ if (gru_user_copy_handle(&ubuf, cch))
+ goto fail;
+ if (cch_locked)
+ ubufcch->delresp = 0;
+ bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+ if (cch_locked || !lock_cch) {
+ gts = gru->gs_gts[ctxnum];
+ if (gts && gts->ts_vma) {
+ hdr.pid = gts->ts_tgid_owner;
+ hdr.vaddr = gts->ts_vma->vm_start;
+ }
+ if (cch->state != CCHSTATE_INACTIVE) {
+ cbrcnt = hweight64(cch->cbr_allocation_map) *
+ GRU_CBR_AU_SIZE;
+ dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+ GRU_DSR_AU_CL : 0;
+ }
+ bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+ else
+ ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+ dsrcnt);
+
+ }
+ if (cch_locked)
+ unlock_cch_handle(cch);
+ if (ret)
+ return ret;
+
+ hdr.magic = GRU_DUMP_MAGIC;
+ hdr.gid = gru->gs_gid;
+ hdr.ctxnum = ctxnum;
+ hdr.cbrcnt = cbrcnt;
+ hdr.dsrcnt = dsrcnt;
+ hdr.cch_locked = cch_locked;
+ if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr)))
+ ret = -EFAULT;
+
+ return ret ? ret : bytes;
+
+fail:
+ unlock_cch_handle(cch);
+ return -EFAULT;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+ struct gru_state *gru;
+ struct gru_dump_chiplet_state_req req;
+ void __user *ubuf;
+ void __user *ubufend;
+ int ctxnum, ret, cnt = 0;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ /* Currently, only dump by gid is implemented */
+ if (req.gid >= gru_max_gids || req.gid < 0)
+ return -EINVAL;
+
+ gru = GID_TO_GRU(req.gid);
+ ubuf = req.buf;
+ ubufend = req.buf + req.buflen;
+
+ ret = gru_dump_tfm(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ ret = gru_dump_tgh(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+ ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+ req.data_opt, req.lock_cch);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+ cnt++;
+ }
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
+ return cnt;
+
+fail:
+ return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index d8bd7d84a7c..eedbf9c3276 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -31,6 +31,7 @@
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/uaccess.h>
+#include <linux/delay.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
@@ -45,18 +46,66 @@
* resources. This will likely be replaced when we better understand the
* kernel/user requirements.
*
- * At boot time, the kernel permanently reserves a fixed number of
- * CBRs/DSRs for each cpu to use. The resources are all taken from
- * the GRU chiplet 1 on the blade. This leaves the full set of resources
- * of chiplet 0 available to be allocated to a single user.
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * Each blade has one "kernel context" that owns GRU kernel resources
+ * located on the blade. Kernel drivers use GRU resources in this context
+ * for sending messages, zeroing memory, etc.
+ *
+ * The kernel context is dynamically loaded on demand. If it is not in
+ * use by the kernel, the kernel context can be unloaded & given to a user.
+ * The kernel context will be reloaded when needed. This may require that
+ * a context be stolen from a user.
+ * NOTE: frequent unloading/reloading of the kernel context is
+ * expensive. We are depending on batch schedulers, cpusets, sane
+ * drivers or some other mechanism to prevent the need for frequent
+ * stealing/reloading.
+ *
+ * The kernel context consists of two parts:
+ * - 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * Each cpu has it's own private resources & does not share them
+ * with other cpus. These resources are used serially, ie,
+ * locked, used & unlocked on each call to a function in
+ * grukservices.
+ * (Now that we have dynamic loading of kernel contexts, I
+ * may rethink this & allow sharing between cpus....)
+ *
+ * - Additional resources can be reserved long term & used directly
+ * by UV drivers located in the kernel. Drivers using these GRU
+ * resources can use asynchronous GRU instructions that send
+ * interrupts on completion.
+ * - these resources must be explicitly locked/unlocked
+ * - locked resources prevent (obviously) the kernel
+ * context from being unloaded.
+ * - drivers using these resource directly issue their own
+ * GRU instruction and must wait/check completion.
+ *
+ * When these resources are reserved, the caller can optionally
+ * associate a wait_queue with the resources and use asynchronous
+ * GRU instructions. When an async GRU instruction completes, the
+ * driver will do a wakeup on the event.
+ *
*/
-/* Blade percpu resources PERMANENTLY reserved for kernel use */
+
+#define ASYNC_HAN_TO_BID(h) ((h) - 1)
+#define ASYNC_BID_TO_HAN(b) ((b) + 1)
+#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
+#define KCB_TO_GID(cb) ((cb - gru_start_vaddr) / \
+ (GRU_SIZE * GRU_CHIPLETS_PER_BLADE))
+#define KCB_TO_BS(cb) gru_base[KCB_TO_GID(cb)]
+
#define GRU_NUM_KERNEL_CBR 1
#define GRU_NUM_KERNEL_DSR_BYTES 256
#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
GRU_CACHE_LINE_BYTES)
-#define KERNEL_CTXNUM 15
/* GRU instruction attributes for all instructions */
#define IMA IMA_CB_DELAY
@@ -98,6 +147,108 @@ struct message_header {
#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+ struct gru_state *gru;
+ struct gru_thread_state *kgts;
+ void *vaddr;
+ int ctxnum, ncpus;
+
+ up_read(&bs->bs_kgts_sema);
+ down_write(&bs->bs_kgts_sema);
+
+ if (!bs->bs_kgts)
+ bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0);
+ kgts = bs->bs_kgts;
+
+ if (!kgts->ts_gru) {
+ STAT(load_kernel_context);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+ GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+ kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+ GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+ bs->bs_async_dsr_bytes);
+ while (!gru_assign_gru_context(kgts, blade_id)) {
+ msleep(1);
+ gru_steal_context(kgts, blade_id);
+ }
+ gru_load_context(kgts);
+ gru = bs->bs_kgts->ts_gru;
+ vaddr = gru->gs_gru_base_vaddr;
+ ctxnum = kgts->ts_ctxnum;
+ bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+ bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+ }
+ downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ * Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int bid, ret = 0;
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+ bs = gru_base[bid];
+ if (!bs)
+ continue;
+ if (down_write_trylock(&bs->bs_kgts_sema)) {
+ kgts = bs->bs_kgts;
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ kfree(kgts);
+ bs->bs_kgts = NULL;
+ up_write(&bs->bs_kgts_sema);
+ } else {
+ ret++;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+
+ STAT(lock_kernel_context);
+ bs = gru_base[blade_id];
+
+ down_read(&bs->bs_kgts_sema);
+ if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+ gru_load_kernel_context(bs, blade_id);
+ return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+
+ bs = gru_base[blade_id];
+ up_read(&bs->bs_kgts_sema);
+ STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * - returns with preemption disabled
+ */
static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
{
struct gru_blade_state *bs;
@@ -105,30 +256,148 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
preempt_disable();
- bs = gru_base[uv_numa_blade_id()];
+ bs = gru_lock_kernel_context(uv_numa_blade_id());
lcpu = uv_blade_processor_id();
*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
return 0;
}
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
static void gru_free_cpu_resources(void *cb, void *dsr)
{
+ gru_unlock_kernel_context(uv_numa_blade_id());
preempt_enable();
}
+/*
+ * Reserve GRU resources to be used asynchronously.
+ * Note: currently supports only 1 reservation per blade.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * output:
+ * handle to identify resource
+ * (0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int ret = 0;
+
+ bs = gru_base[blade_id];
+
+ down_write(&bs->bs_kgts_sema);
+
+ /* Verify no resources already reserved */
+ if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+ goto done;
+ bs->bs_async_dsr_bytes = dsr_bytes;
+ bs->bs_async_cbrs = cbrs;
+ bs->bs_async_wq = cmp;
+ kgts = bs->bs_kgts;
+
+ /* Resources changed. Unload context if already loaded */
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+ up_write(&bs->bs_kgts_sema);
+ return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ down_write(&bs->bs_kgts_sema);
+ bs->bs_async_dsr_bytes = 0;
+ bs->bs_async_cbrs = 0;
+ bs->bs_async_wq = NULL;
+ up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ wait_for_completion(bs->bs_async_wq);
+ mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+ int blade_id = ASYNC_HAN_TO_BID(han);
+ int ncpus;
+
+ gru_lock_kernel_context(blade_id);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ if (cb)
+ *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+ if (dsr)
+ *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+ int blade_id = ASYNC_HAN_TO_BID(han);
+
+ gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
int gru_get_cb_exception_detail(void *cb,
struct control_block_extended_exc_detail *excdet)
{
struct gru_control_block_extended *cbe;
+ struct gru_blade_state *bs;
+ int cbrnum;
- cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
- prefetchw(cbe); /* Harmless on hardware, required for emulator */
+ bs = KCB_TO_BS(cb);
+ cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb));
+ cbe = get_cbe(GRUBASE(cb), cbrnum);
+ gru_flush_cache(cbe); /* CBE not coherent */
excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause;
excdet->exceptdet0 = cbe->idef1upd;
excdet->exceptdet1 = cbe->idef3upd;
+ gru_flush_cache(cbe);
return 0;
}
@@ -167,13 +436,13 @@ static int gru_retry_exception(void *cb)
int retry = EXCEPTION_RETRY_LIMIT;
while (1) {
- if (gru_get_cb_message_queue_substatus(cb))
- break;
if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
return CBS_IDLE;
-
+ if (gru_get_cb_message_queue_substatus(cb))
+ return CBS_EXCEPTION;
gru_get_cb_exception_detail(cb, &excdet);
- if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+ if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+ (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
break;
if (retry-- == 0)
break;
@@ -416,6 +685,29 @@ static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd)
mqd->interrupt_vector);
}
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
+{
+ unsigned long m;
+
+ m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+ if (lines == 2) {
+ gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ }
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ send_message_queue_interrupt(mqd);
+ return MQE_OK;
+}
/*
* Handle a gru_mesq failure. Some of these failures are software recoverable
@@ -425,7 +717,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
void *mesg, int lines)
{
int substatus, ret = 0;
- unsigned long m;
substatus = gru_get_cb_message_queue_substatus(cb);
switch (substatus) {
@@ -447,14 +738,7 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
break;
case CBSS_PUT_NACKED:
STAT(mesq_send_put_nacked);
- m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
- gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
- if (gru_wait(cb) == CBS_IDLE) {
- ret = MQE_OK;
- send_message_queue_interrupt(mqd);
- } else {
- ret = MQE_UNEXPECTED_CB_ERR;
- }
+ ret = send_message_put_nacked(cb, mqd, mesg, lines);
break;
default:
BUG();
@@ -597,115 +881,177 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa);
/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
/* Temp - will delete after we gain confidence in the GRU */
-static __cacheline_aligned unsigned long word0;
-static __cacheline_aligned unsigned long word1;
-static int quicktest(struct gru_state *gru)
+static int quicktest0(unsigned long arg)
{
+ unsigned long word0;
+ unsigned long word1;
void *cb;
- void *ds;
+ void *dsr;
unsigned long *p;
+ int ret = -EIO;
- cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- p = ds;
+ if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ p = dsr;
word0 = MAGIC;
+ word1 = 0;
- gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU quicktest0: CBR failure 1\n");
+ goto done;
+ }
- if (*(unsigned long *)ds != MAGIC)
- BUG();
- gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ if (*p != MAGIC) {
+ printk(KERN_DEBUG "GRU: quicktest0 bad magic 0x%lx\n", *p);
+ goto done;
+ }
+ gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU quicktest0: CBR failure 2\n");
+ goto done;
+ }
- if (word0 != word1 || word0 != MAGIC) {
- printk
- ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
- gru->gs_gid, word1, MAGIC);
- BUG(); /* ZZZ should not be fatal */
+ if (word0 != word1 || word1 != MAGIC) {
+ printk(KERN_DEBUG
+ "GRU quicktest0 err: found 0x%lx, expected 0x%lx\n",
+ word1, MAGIC);
+ goto done;
}
+ ret = 0;
- return 0;
+done:
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
}
+#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
-int gru_kservices_init(struct gru_state *gru)
+static int quicktest1(unsigned long arg)
{
- struct gru_blade_state *bs;
- struct gru_context_configuration_handle *cch;
- unsigned long cbr_map, dsr_map;
- int err, num, cpus_possible;
-
- /*
- * Currently, resources are reserved ONLY on the second chiplet
- * on each blade. This leaves ALL resources on chiplet 0 available
- * for user code.
- */
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return 0;
-
- cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
-
- num = GRU_NUM_KERNEL_CBR * cpus_possible;
- cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
- gru->gs_reserved_cbrs += num;
-
- num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
- dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
- gru->gs_reserved_dsr_bytes += num;
-
- gru->gs_active_contexts++;
- __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
-
- bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
- bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
-
- lock_cch_handle(cch);
- cch->tfm_fault_bit_enable = 0;
- cch->tlb_int_enable = 0;
- cch->tfm_done_bit_enable = 0;
- cch->unmap_enable = 1;
- err = cch_allocate(cch, 0, 0, cbr_map, dsr_map);
- if (err) {
- gru_dbg(grudev,
- "Unable to allocate kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ struct gru_message_queue_desc mqd;
+ void *p, *mq;
+ unsigned long *dw;
+ int i, ret = -EIO;
+ char mes[GRU_CACHE_LINE_BYTES], *m;
+
+ /* Need 1K cacheline aligned that does not cross page boundary */
+ p = kmalloc(4096, 0);
+ mq = ALIGNUP(p, 1024);
+ memset(mes, 0xee, sizeof(mes));
+ dw = mq;
+
+ gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+ for (i = 0; i < 6; i++) {
+ mes[8] = i;
+ do {
+ ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+ } while (ret == MQE_CONGESTION);
+ if (ret)
+ break;
}
- if (cch_start(cch)) {
- gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ if (ret != MQE_QUEUE_FULL || i != 4)
+ goto done;
+
+ for (i = 0; i < 6; i++) {
+ m = gru_get_next_message(&mqd);
+ if (!m || m[8] != i)
+ break;
+ gru_free_message(&mqd, m);
}
- unlock_cch_handle(cch);
+ ret = (i == 4) ? 0 : -EIO;
- if (gru_options & GRU_QUICKLOOK)
- quicktest(gru);
- return 0;
+done:
+ kfree(p);
+ return ret;
}
-void gru_kservices_exit(struct gru_state *gru)
+static int quicktest2(unsigned long arg)
{
- struct gru_context_configuration_handle *cch;
- struct gru_blade_state *bs;
+ static DECLARE_COMPLETION(cmp);
+ unsigned long han;
+ int blade_id = 0;
+ int numcb = 4;
+ int ret = 0;
+ unsigned long *buf;
+ void *cb0, *cb;
+ int i, k, istatus, bytes;
+
+ bytes = numcb * 4 * 8;
+ buf = kmalloc(bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = -EBUSY;
+ han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+ if (!han)
+ goto done;
+
+ gru_lock_async_resource(han, &cb0, NULL);
+ memset(buf, 0xee, bytes);
+ for (i = 0; i < numcb; i++)
+ gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+ XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+ ret = 0;
+ for (k = 0; k < numcb; k++) {
+ gru_wait_async_cbr(han);
+ for (i = 0; i < numcb; i++) {
+ cb = cb0 + i * GRU_HANDLE_STRIDE;
+ istatus = gru_check_status(cb);
+ if (istatus == CBS_ACTIVE)
+ continue;
+ if (istatus == CBS_EXCEPTION)
+ ret = -EFAULT;
+ else if (buf[i] || buf[i + 1] || buf[i + 2] ||
+ buf[i + 3])
+ ret = -EIO;
+ }
+ }
+ BUG_ON(cmp.done);
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return;
+ gru_unlock_async_resource(han);
+ gru_release_async_resources(han);
+done:
+ kfree(buf);
+ return ret;
+}
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
- lock_cch_handle(cch);
- if (cch_interrupt_sync(cch))
- BUG();
- if (cch_deallocate(cch))
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+ int ret = -EINVAL;
+
+ switch (arg & 0xff) {
+ case 0:
+ ret = quicktest0(arg);
+ break;
+ case 1:
+ ret = quicktest1(arg);
+ break;
+ case 2:
+ ret = quicktest2(arg);
+ break;
+ case 99:
+ ret = gru_free_kernel_contexts();
+ break;
+ }
+ return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+ return 0;
+}
+
+void gru_kservices_exit(void)
+{
+ if (gru_free_kernel_contexts())
BUG();
- unlock_cch_handle(cch);
}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
index 747ed315d56..d60d34bca44 100644
--- a/drivers/misc/sgi-gru/grukservices.h
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -146,4 +146,55 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
unsigned int bytes);
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * cmp - completion structure for waiting for
+ * async completions
+ * output:
+ * handle to identify resource
+ * (0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
index e56e196a699..889bc442a3e 100644
--- a/drivers/misc/sgi-gru/grulib.h
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -32,8 +32,8 @@
/* Set Number of Request Blocks */
#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
-/* Register task as using the slice */
-#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
+/* Set Context Options */
+#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *)
/* Fetch exception detail */
#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
@@ -44,8 +44,11 @@
/* For user unload context */
#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
-/* For fetching GRU chiplet status */
-#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *)
/* For user TLB flushing (primarily for tests) */
#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
@@ -53,8 +56,26 @@
/* Get some config options (primarily for tests & emulator) */
#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
+/* Various kernel self-tests */
+#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *)
+
#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+/*
+ * Statictics kept on a per-GTS basis.
+ */
+struct gts_statistics {
+ unsigned long fmm_tlbdropin;
+ unsigned long upm_tlbdropin;
+ unsigned long context_stolen;
+};
+
+struct gru_get_gseg_statistics_req {
+ unsigned long gseg;
+ struct gts_statistics stats;
+};
/*
* Structure used to pass TLB flush parameters to the driver
@@ -75,6 +96,16 @@ struct gru_unload_context_req {
};
/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice};
+struct gru_set_context_option_req {
+ unsigned long gseg;
+ int op;
+ unsigned long val1;
+};
+
+/*
* Structure used to pass TLB flush parameters to the driver
*/
struct gru_flush_tlb_req {
@@ -84,6 +115,36 @@ struct gru_flush_tlb_req {
};
/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+ unsigned int op;
+ unsigned int gid;
+ int ctxnum;
+ char data_opt;
+ char lock_cch;
+ pid_t pid;
+ void *buf;
+ size_t buflen;
+ /* ---- output --- */
+ unsigned int num_contexts;
+};
+
+#define GRU_DUMP_MAGIC 0x3474ab6c
+struct gru_dump_context_header {
+ unsigned int magic;
+ unsigned int gid;
+ unsigned char ctxnum;
+ unsigned char cbrcnt;
+ unsigned char dsrcnt;
+ pid_t pid;
+ unsigned long vaddr;
+ int cch_locked;
+ unsigned long data[0];
+};
+
+/*
* GRU configuration info (temp - for testing)
*/
struct gru_config_info {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ec3f7a17d22..3bc643dad60 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -3,11 +3,21 @@
*
* DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
@@ -96,7 +106,7 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid)
gid = gru->gs_gid;
again:
for (i = 0; i < GRU_NUM_CCH; i++) {
- if (!gru->gs_gts[i])
+ if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
continue;
inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
@@ -150,7 +160,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
unsigned long bits = 0;
int i;
- do {
+ while (n--) {
i = find_first_bit(p, mmax);
if (i == mmax)
BUG();
@@ -158,7 +168,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
__set_bit(i, &bits);
if (idx)
*idx++ = i;
- } while (--n);
+ }
return bits;
}
@@ -299,38 +309,39 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
/*
* Allocate a thread state structure.
*/
-static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
- struct gru_vma_data *vdata,
- int tsid)
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count, int options, int tsid)
{
struct gru_thread_state *gts;
int bytes;
- bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
- CBR_BYTES(vdata->vd_cbr_au_count);
+ bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
bytes += sizeof(struct gru_thread_state);
- gts = kzalloc(bytes, GFP_KERNEL);
+ gts = kmalloc(bytes, GFP_KERNEL);
if (!gts)
return NULL;
STAT(gts_alloc);
+ memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
atomic_set(&gts->ts_refcnt, 1);
mutex_init(&gts->ts_ctxlock);
- gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
- gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
- gts->ts_user_options = vdata->vd_user_options;
+ gts->ts_cbr_au_count = cbr_au_count;
+ gts->ts_dsr_au_count = dsr_au_count;
+ gts->ts_user_options = options;
gts->ts_tsid = tsid;
- gts->ts_user_options = vdata->vd_user_options;
gts->ts_ctxnum = NULLCTX;
- gts->ts_mm = current->mm;
- gts->ts_vma = vma;
gts->ts_tlb_int_select = -1;
- gts->ts_gms = gru_register_mmu_notifier();
+ gts->ts_cch_req_slice = -1;
gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
- if (!gts->ts_gms)
- goto err;
+ if (vma) {
+ gts->ts_mm = current->mm;
+ gts->ts_vma = vma;
+ gts->ts_gms = gru_register_mmu_notifier();
+ if (!gts->ts_gms)
+ goto err;
+ }
- gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
+ gru_dbg(grudev, "alloc gts %p\n", gts);
return gts;
err:
@@ -381,7 +392,8 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
struct gru_vma_data *vdata = vma->vm_private_data;
struct gru_thread_state *gts, *ngts;
- gts = gru_alloc_gts(vma, vdata, tsid);
+ gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count,
+ vdata->vd_user_options, tsid);
if (!gts)
return NULL;
@@ -458,7 +470,8 @@ static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
}
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
- unsigned long cbrmap, unsigned long dsrmap)
+ unsigned long cbrmap, unsigned long dsrmap,
+ int data_valid)
{
void *gseg, *cb, *cbe;
unsigned long length;
@@ -471,12 +484,22 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
- save += gru_copy_handle(cb, save);
- save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
+ if (data_valid) {
+ save += gru_copy_handle(cb, save);
+ save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+ save);
+ } else {
+ memset(cb, 0, GRU_CACHE_LINE_BYTES);
+ memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+ GRU_CACHE_LINE_BYTES);
+ }
cb += GRU_HANDLE_STRIDE;
}
- memcpy(gseg + GRU_DS_BASE, save, length);
+ if (data_valid)
+ memcpy(gseg + GRU_DS_BASE, save, length);
+ else
+ memset(gseg + GRU_DS_BASE, 0, length);
}
static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
@@ -506,7 +529,8 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
struct gru_context_configuration_handle *cch;
int ctxnum = gts->ts_ctxnum;
- zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+ if (!is_kernel_context(gts))
+ zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
gru_dbg(grudev, "gts %p\n", gts);
@@ -514,11 +538,14 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
if (cch_interrupt_sync(cch))
BUG();
- gru_unload_mm_tracker(gru, gts);
- if (savestate)
+ if (!is_kernel_context(gts))
+ gru_unload_mm_tracker(gru, gts);
+ if (savestate) {
gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
ctxnum, gts->ts_cbr_map,
gts->ts_dsr_map);
+ gts->ts_data_valid = 1;
+ }
if (cch_deallocate(cch))
BUG();
@@ -526,24 +553,22 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
unlock_cch_handle(cch);
gru_free_gru_context(gts);
- STAT(unload_context);
}
/*
* Load a GRU context by copying it from the thread data structure in memory
* to the GRU.
*/
-static void gru_load_context(struct gru_thread_state *gts)
+void gru_load_context(struct gru_thread_state *gts)
{
struct gru_state *gru = gts->ts_gru;
struct gru_context_configuration_handle *cch;
- int err, asid, ctxnum = gts->ts_ctxnum;
+ int i, err, asid, ctxnum = gts->ts_ctxnum;
gru_dbg(grudev, "gts %p\n", gts);
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
lock_cch_handle(cch);
- asid = gru_load_mm_tracker(gru, gts);
cch->tfm_fault_bit_enable =
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
@@ -552,9 +577,32 @@ static void gru_load_context(struct gru_thread_state *gts)
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gts->ts_tlb_int_select;
}
+ if (gts->ts_cch_req_slice >= 0) {
+ cch->req_slice_set_enable = 1;
+ cch->req_slice = gts->ts_cch_req_slice;
+ } else {
+ cch->req_slice_set_enable =0;
+ }
cch->tfm_done_bit_enable = 0;
- err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map,
- gts->ts_dsr_map);
+ cch->dsr_allocation_map = gts->ts_dsr_map;
+ cch->cbr_allocation_map = gts->ts_cbr_map;
+
+ if (is_kernel_context(gts)) {
+ cch->unmap_enable = 1;
+ cch->tfm_done_bit_enable = 1;
+ cch->cb_int_enable = 1;
+ } else {
+ cch->unmap_enable = 0;
+ cch->tfm_done_bit_enable = 0;
+ cch->cb_int_enable = 0;
+ asid = gru_load_mm_tracker(gru, gts);
+ for (i = 0; i < 8; i++) {
+ cch->asid[i] = asid + i;
+ cch->sizeavail[i] = gts->ts_sizeavail;
+ }
+ }
+
+ err = cch_allocate(cch);
if (err) {
gru_dbg(grudev,
"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
@@ -563,13 +611,11 @@ static void gru_load_context(struct gru_thread_state *gts)
}
gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
- gts->ts_cbr_map, gts->ts_dsr_map);
+ gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
if (cch_start(cch))
BUG();
unlock_cch_handle(cch);
-
- STAT(load_context);
}
/*
@@ -599,6 +645,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload)
cch->sizeavail[i] = gts->ts_sizeavail;
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gru_cpu_fault_map_id();
+ cch->tfm_fault_bit_enable =
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+ || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
} else {
for (i = 0; i < 8; i++)
cch->asid[i] = 0;
@@ -642,7 +691,28 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
((g)+1) : &(b)->bs_grus[0])
-static void gru_steal_context(struct gru_thread_state *gts)
+static int is_gts_stealable(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts))
+ return down_write_trylock(&bs->bs_kgts_sema);
+ else
+ return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts)) {
+ up_write(&bs->bs_kgts_sema);
+ STAT(steal_kernel_context);
+ } else {
+ mutex_unlock(&gts->ts_ctxlock);
+ STAT(steal_user_context);
+ }
+}
+
+void gru_steal_context(struct gru_thread_state *gts, int blade_id)
{
struct gru_blade_state *blade;
struct gru_state *gru, *gru0;
@@ -652,8 +722,7 @@ static void gru_steal_context(struct gru_thread_state *gts)
cbr = gts->ts_cbr_au_count;
dsr = gts->ts_dsr_au_count;
- preempt_disable();
- blade = gru_base[uv_numa_blade_id()];
+ blade = gru_base[blade_id];
spin_lock(&blade->bs_lock);
ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
@@ -676,7 +745,7 @@ static void gru_steal_context(struct gru_thread_state *gts)
* success are high. If trylock fails, try to steal a
* different GSEG.
*/
- if (ngts && mutex_trylock(&ngts->ts_ctxlock))
+ if (ngts && is_gts_stealable(ngts, blade))
break;
ngts = NULL;
flag = 1;
@@ -690,13 +759,12 @@ static void gru_steal_context(struct gru_thread_state *gts)
blade->bs_lru_gru = gru;
blade->bs_lru_ctxnum = ctxnum;
spin_unlock(&blade->bs_lock);
- preempt_enable();
if (ngts) {
- STAT(steal_context);
+ gts->ustats.context_stolen++;
ngts->ts_steal_jiffies = jiffies;
- gru_unload_context(ngts, 1);
- mutex_unlock(&ngts->ts_ctxlock);
+ gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+ gts_stolen(ngts, blade);
} else {
STAT(steal_context_failed);
}
@@ -710,17 +778,17 @@ static void gru_steal_context(struct gru_thread_state *gts)
/*
* Scan the GRUs on the local blade & assign a GRU context.
*/
-static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
+ int blade)
{
struct gru_state *gru, *grux;
int i, max_active_contexts;
- preempt_disable();
again:
gru = NULL;
max_active_contexts = GRU_NUM_CCH;
- for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
+ for_each_gru_on_blade(grux, blade, i) {
if (check_gru_resources(grux, gts->ts_cbr_au_count,
gts->ts_dsr_au_count,
max_active_contexts)) {
@@ -760,7 +828,6 @@ again:
STAT(assign_context_failed);
}
- preempt_enable();
return gru;
}
@@ -775,6 +842,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct gru_thread_state *gts;
unsigned long paddr, vaddr;
+ int blade_id;
vaddr = (unsigned long)vmf->virtual_address;
gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -789,8 +857,10 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
again:
mutex_lock(&gts->ts_ctxlock);
preempt_disable();
+ blade_id = uv_numa_blade_id();
+
if (gts->ts_gru) {
- if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
+ if (gts->ts_gru->gs_blade_id != blade_id) {
STAT(migrated_nopfn_unload);
gru_unload_context(gts, 1);
} else {
@@ -800,12 +870,15 @@ again:
}
if (!gts->ts_gru) {
- if (!gru_assign_gru_context(gts)) {
- mutex_unlock(&gts->ts_ctxlock);
+ STAT(load_user_context);
+ if (!gru_assign_gru_context(gts, blade_id)) {
preempt_enable();
+ mutex_unlock(&gts->ts_ctxlock);
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
+ blade_id = uv_numa_blade_id();
if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
- gru_steal_context(gts);
+ gru_steal_context(gts, blade_id);
goto again;
}
gru_load_context(gts);
@@ -815,8 +888,8 @@ again:
vma->vm_page_prot);
}
- mutex_unlock(&gts->ts_ctxlock);
preempt_enable();
+ mutex_unlock(&gts->ts_ctxlock);
return VM_FAULT_NOPAGE;
}
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index ee74821b171..9cbf95bedce 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -51,9 +51,12 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, assign_context);
printstat(s, assign_context_failed);
printstat(s, free_context);
- printstat(s, load_context);
- printstat(s, unload_context);
- printstat(s, steal_context);
+ printstat(s, load_user_context);
+ printstat(s, load_kernel_context);
+ printstat(s, lock_kernel_context);
+ printstat(s, unlock_kernel_context);
+ printstat(s, steal_user_context);
+ printstat(s, steal_kernel_context);
printstat(s, steal_context_failed);
printstat(s, nopfn);
printstat(s, break_cow);
@@ -70,7 +73,7 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, user_flush_tlb);
printstat(s, user_unload_context);
printstat(s, user_exception);
- printstat(s, set_task_slice);
+ printstat(s, set_context_option);
printstat(s, migrate_check);
printstat(s, migrated_retarget);
printstat(s, migrated_unload);
@@ -84,6 +87,9 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, tlb_dropin_fail_range_active);
printstat(s, tlb_dropin_fail_idle);
printstat(s, tlb_dropin_fail_fmm);
+ printstat(s, tlb_dropin_fail_no_exception);
+ printstat(s, tlb_dropin_fail_no_exception_war);
+ printstat(s, tfh_stale_on_fault);
printstat(s, mmu_invalidate_range);
printstat(s, mmu_invalidate_page);
printstat(s, mmu_clear_flush_young);
@@ -158,8 +164,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
unsigned long val;
char buf[80];
- if (copy_from_user
- (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
+ if (strncpy_from_user(buf, userbuf, sizeof(buf) - 1) < 0)
return -EFAULT;
buf[count - 1] = '\0';
if (!strict_strtoul(buf, 10, &val))
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index bf1eeb7553e..34ab3d45391 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -148,11 +148,13 @@
#include <linux/wait.h>
#include <linux/mmu_notifier.h>
#include "gru.h"
+#include "grulib.h"
#include "gruhandles.h"
extern struct gru_stats_s gru_stats;
extern struct gru_blade_state *gru_base[];
extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
extern unsigned int gru_max_gids;
#define GRU_MAX_BLADES MAX_NUMNODES
@@ -174,9 +176,12 @@ struct gru_stats_s {
atomic_long_t assign_context;
atomic_long_t assign_context_failed;
atomic_long_t free_context;
- atomic_long_t load_context;
- atomic_long_t unload_context;
- atomic_long_t steal_context;
+ atomic_long_t load_user_context;
+ atomic_long_t load_kernel_context;
+ atomic_long_t lock_kernel_context;
+ atomic_long_t unlock_kernel_context;
+ atomic_long_t steal_user_context;
+ atomic_long_t steal_kernel_context;
atomic_long_t steal_context_failed;
atomic_long_t nopfn;
atomic_long_t break_cow;
@@ -193,7 +198,7 @@ struct gru_stats_s {
atomic_long_t user_flush_tlb;
atomic_long_t user_unload_context;
atomic_long_t user_exception;
- atomic_long_t set_task_slice;
+ atomic_long_t set_context_option;
atomic_long_t migrate_check;
atomic_long_t migrated_retarget;
atomic_long_t migrated_unload;
@@ -207,6 +212,9 @@ struct gru_stats_s {
atomic_long_t tlb_dropin_fail_range_active;
atomic_long_t tlb_dropin_fail_idle;
atomic_long_t tlb_dropin_fail_fmm;
+ atomic_long_t tlb_dropin_fail_no_exception;
+ atomic_long_t tlb_dropin_fail_no_exception_war;
+ atomic_long_t tfh_stale_on_fault;
atomic_long_t mmu_invalidate_range;
atomic_long_t mmu_invalidate_page;
atomic_long_t mmu_clear_flush_young;
@@ -253,7 +261,6 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
#define OPT_DPRINT 1
#define OPT_STATS 2
-#define GRU_QUICKLOOK 4
#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
@@ -373,6 +380,7 @@ struct gru_thread_state {
required for contest */
unsigned char ts_cbr_au_count;/* Number of CBR resources
required for contest */
+ char ts_cch_req_slice;/* CCH packet slice */
char ts_blade; /* If >= 0, migrate context if
ref from diferent blade */
char ts_force_cch_reload;
@@ -380,6 +388,9 @@ struct gru_thread_state {
after migration */
char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
allocated CB */
+ int ts_data_valid; /* Indicates if ts_gdata has
+ valid data */
+ struct gts_statistics ustats; /* User statistics */
unsigned long ts_gdata[0]; /* save area for GRU data (CB,
DS, CBE) */
};
@@ -452,6 +463,14 @@ struct gru_blade_state {
reserved cb */
void *kernel_dsr; /* First kernel
reserved DSR */
+ struct rw_semaphore bs_kgts_sema; /* lock for kgts */
+ struct gru_thread_state *bs_kgts; /* GTS for kernel use */
+
+ /* ---- the following are used for managing kernel async GRU CBRs --- */
+ int bs_async_dsr_bytes; /* DSRs for async */
+ int bs_async_cbrs; /* CBRs AU for async */
+ struct completion *bs_async_wq;
+
/* ---- the following are protected by the bs_lock spinlock ---- */
spinlock_t bs_lock; /* lock used for
stealing contexts */
@@ -552,6 +571,12 @@ struct gru_blade_state {
/* Lock hierarchy checking enabled only in emulator */
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+ return !test_and_set_bit(1, h);
+}
+
static inline void __lock_handle(void *h)
{
while (test_and_set_bit(1, h))
@@ -563,6 +588,11 @@ static inline void __unlock_handle(void *h)
clear_bit(1, h);
}
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+ return __trylock_handle(cch);
+}
+
static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
{
__lock_handle(cch);
@@ -584,6 +614,11 @@ static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
__unlock_handle(tgh);
}
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+ return !gts->ts_mm;
+}
+
/*-----------------------------------------------------------------------------
* Function prototypes & externs
*/
@@ -598,24 +633,32 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
*vma, int tsid);
extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts,
+ int blade);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts, int blade_id);
extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
extern int gru_update_cch(struct gru_thread_state *gts, int force_unload);
extern void gts_drop(struct gru_thread_state *gts);
extern void gru_tgh_flush_init(struct gru_state *gru);
-extern int gru_kservices_init(struct gru_state *gru);
-extern void gru_kservices_exit(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
extern irqreturn_t gru_intr(int irq, void *dev_id);
extern int gru_handle_user_call_os(unsigned long address);
extern int gru_user_flush_tlb(unsigned long arg);
extern int gru_user_unload_context(unsigned long arg);
extern int gru_get_exception_detail(unsigned long arg);
-extern int gru_set_task_slice(long address);
+extern int gru_set_context_option(unsigned long address);
extern int gru_cpu_fault_map_id(void);
extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
extern void gru_flush_all_tlb(struct gru_state *gru);
extern int gru_proc_init(void);
extern void gru_proc_exit(void);
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count, int options, int tsid);
extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap);
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
@@ -624,6 +667,7 @@ extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+extern int gru_ktest(unsigned long arg);
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
unsigned long len);
diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig
new file mode 100644
index 00000000000..cc2eb8edb51
--- /dev/null
+++ b/drivers/pps/Kconfig
@@ -0,0 +1,33 @@
+#
+# PPS support configuration
+#
+
+menu "PPS support"
+
+config PPS
+ tristate "PPS support"
+ depends on EXPERIMENTAL
+ ---help---
+ PPS (Pulse Per Second) is a special pulse provided by some GPS
+ antennae. Userland can use it to get a high-precision time
+ reference.
+
+ Some antennae's PPS signals are connected with the CD (Carrier
+ Detect) pin of the serial line they use to communicate with the
+ host. In this case use the SERIAL_LINE client support.
+
+ Some antennae's PPS signals are connected with some special host
+ inputs so you have to enable the corresponding client support.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pps_core.ko.
+
+config PPS_DEBUG
+ bool "PPS debugging messages"
+ depends on PPS
+ help
+ Say Y here if you want the PPS support to produce a bunch of debug
+ messages to the system log. Select this if you are having a
+ problem with PPS support and want to see more of what is going on.
+
+endmenu
diff --git a/drivers/pps/Makefile b/drivers/pps/Makefile
new file mode 100644
index 00000000000..19ea582f431
--- /dev/null
+++ b/drivers/pps/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the PPS core.
+#
+
+pps_core-y := pps.o kapi.o sysfs.o
+obj-$(CONFIG_PPS) := pps_core.o
+
+ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
new file mode 100644
index 00000000000..35a0b192d76
--- /dev/null
+++ b/drivers/pps/kapi.c
@@ -0,0 +1,329 @@
+/*
+ * kernel API
+ *
+ *
+ * Copyright (C) 2005-2009 Rodolfo Giometti <giometti@linux.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/fs.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Global variables
+ */
+
+DEFINE_SPINLOCK(pps_idr_lock);
+DEFINE_IDR(pps_idr);
+
+/*
+ * Local functions
+ */
+
+static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
+{
+ ts->nsec += offset->nsec;
+ while (ts->nsec >= NSEC_PER_SEC) {
+ ts->nsec -= NSEC_PER_SEC;
+ ts->sec++;
+ }
+ while (ts->nsec < 0) {
+ ts->nsec += NSEC_PER_SEC;
+ ts->sec--;
+ }
+ ts->sec += offset->sec;
+}
+
+/*
+ * Exported functions
+ */
+
+/* pps_get_source - find a PPS source
+ * @source: the PPS source ID.
+ *
+ * This function is used to find an already registered PPS source into the
+ * system.
+ *
+ * The function returns NULL if found nothing, otherwise it returns a pointer
+ * to the PPS source data struct (the refcounter is incremented by 1).
+ */
+
+struct pps_device *pps_get_source(int source)
+{
+ struct pps_device *pps;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pps_idr_lock, flags);
+
+ pps = idr_find(&pps_idr, source);
+ if (pps != NULL)
+ atomic_inc(&pps->usage);
+
+ spin_unlock_irqrestore(&pps_idr_lock, flags);
+
+ return pps;
+}
+
+/* pps_put_source - free the PPS source data
+ * @pps: a pointer to the PPS source.
+ *
+ * This function is used to free a PPS data struct if its refcount is 0.
+ */
+
+void pps_put_source(struct pps_device *pps)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pps_idr_lock, flags);
+ BUG_ON(atomic_read(&pps->usage) == 0);
+
+ if (!atomic_dec_and_test(&pps->usage)) {
+ pps = NULL;
+ goto exit;
+ }
+
+ /* No more reference to the PPS source. We can safely remove the
+ * PPS data struct.
+ */
+ idr_remove(&pps_idr, pps->id);
+
+exit:
+ spin_unlock_irqrestore(&pps_idr_lock, flags);
+ kfree(pps);
+}
+
+/* pps_register_source - add a PPS source in the system
+ * @info: the PPS info struct
+ * @default_params: the default PPS parameters of the new source
+ *
+ * This function is used to add a new PPS source in the system. The new
+ * source is described by info's fields and it will have, as default PPS
+ * parameters, the ones specified into default_params.
+ *
+ * The function returns, in case of success, the PPS source ID.
+ */
+
+int pps_register_source(struct pps_source_info *info, int default_params)
+{
+ struct pps_device *pps;
+ int id;
+ int err;
+
+ /* Sanity checks */
+ if ((info->mode & default_params) != default_params) {
+ printk(KERN_ERR "pps: %s: unsupported default parameters\n",
+ info->name);
+ err = -EINVAL;
+ goto pps_register_source_exit;
+ }
+ if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 &&
+ info->echo == NULL) {
+ printk(KERN_ERR "pps: %s: echo function is not defined\n",
+ info->name);
+ err = -EINVAL;
+ goto pps_register_source_exit;
+ }
+ if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
+ printk(KERN_ERR "pps: %s: unspecified time format\n",
+ info->name);
+ err = -EINVAL;
+ goto pps_register_source_exit;
+ }
+
+ /* Allocate memory for the new PPS source struct */
+ pps = kzalloc(sizeof(struct pps_device), GFP_KERNEL);
+ if (pps == NULL) {
+ err = -ENOMEM;
+ goto pps_register_source_exit;
+ }
+
+ /* These initializations must be done before calling idr_get_new()
+ * in order to avoid reces into pps_event().
+ */
+ pps->params.api_version = PPS_API_VERS;
+ pps->params.mode = default_params;
+ pps->info = *info;
+
+ init_waitqueue_head(&pps->queue);
+ spin_lock_init(&pps->lock);
+ atomic_set(&pps->usage, 1);
+
+ /* Get new ID for the new PPS source */
+ if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) {
+ err = -ENOMEM;
+ goto kfree_pps;
+ }
+
+ spin_lock_irq(&pps_idr_lock);
+
+ /* Now really allocate the PPS source.
+ * After idr_get_new() calling the new source will be freely available
+ * into the kernel.
+ */
+ err = idr_get_new(&pps_idr, pps, &id);
+ if (err < 0) {
+ spin_unlock_irq(&pps_idr_lock);
+ goto kfree_pps;
+ }
+
+ id = id & MAX_ID_MASK;
+ if (id >= PPS_MAX_SOURCES) {
+ spin_unlock_irq(&pps_idr_lock);
+
+ printk(KERN_ERR "pps: %s: too many PPS sources in the system\n",
+ info->name);
+ err = -EBUSY;
+ goto free_idr;
+ }
+ pps->id = id;
+
+ spin_unlock_irq(&pps_idr_lock);
+
+ /* Create the char device */
+ err = pps_register_cdev(pps);
+ if (err < 0) {
+ printk(KERN_ERR "pps: %s: unable to create char device\n",
+ info->name);
+ goto free_idr;
+ }
+
+ pr_info("new PPS source %s at ID %d\n", info->name, id);
+
+ return id;
+
+free_idr:
+ spin_lock_irq(&pps_idr_lock);
+ idr_remove(&pps_idr, id);
+ spin_unlock_irq(&pps_idr_lock);
+
+kfree_pps:
+ kfree(pps);
+
+pps_register_source_exit:
+ printk(KERN_ERR "pps: %s: unable to register source\n", info->name);
+
+ return err;
+}
+EXPORT_SYMBOL(pps_register_source);
+
+/* pps_unregister_source - remove a PPS source from the system
+ * @source: the PPS source ID
+ *
+ * This function is used to remove a previously registered PPS source from
+ * the system.
+ */
+
+void pps_unregister_source(int source)
+{
+ struct pps_device *pps;
+
+ spin_lock_irq(&pps_idr_lock);
+ pps = idr_find(&pps_idr, source);
+
+ if (!pps) {
+ BUG();
+ spin_unlock_irq(&pps_idr_lock);
+ return;
+ }
+ spin_unlock_irq(&pps_idr_lock);
+
+ pps_unregister_cdev(pps);
+ pps_put_source(pps);
+}
+EXPORT_SYMBOL(pps_unregister_source);
+
+/* pps_event - register a PPS event into the system
+ * @source: the PPS source ID
+ * @ts: the event timestamp
+ * @event: the event type
+ * @data: userdef pointer
+ *
+ * This function is used by each PPS client in order to register a new
+ * PPS event into the system (it's usually called inside an IRQ handler).
+ *
+ * If an echo function is associated with the PPS source it will be called
+ * as:
+ * pps->info.echo(source, event, data);
+ */
+
+void pps_event(int source, struct pps_ktime *ts, int event, void *data)
+{
+ struct pps_device *pps;
+ unsigned long flags;
+
+ if ((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0) {
+ printk(KERN_ERR "pps: unknown event (%x) for source %d\n",
+ event, source);
+ return;
+ }
+
+ pps = pps_get_source(source);
+ if (!pps)
+ return;
+
+ pr_debug("PPS event on source %d at %llu.%06u\n",
+ pps->id, (unsigned long long) ts->sec, ts->nsec);
+
+ spin_lock_irqsave(&pps->lock, flags);
+
+ /* Must call the echo function? */
+ if ((pps->params.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)))
+ pps->info.echo(source, event, data);
+
+ /* Check the event */
+ pps->current_mode = pps->params.mode;
+ if (event & PPS_CAPTUREASSERT) {
+ /* We have to add an offset? */
+ if (pps->params.mode & PPS_OFFSETASSERT)
+ pps_add_offset(ts, &pps->params.assert_off_tu);
+
+ /* Save the time stamp */
+ pps->assert_tu = *ts;
+ pps->assert_sequence++;
+ pr_debug("capture assert seq #%u for source %d\n",
+ pps->assert_sequence, source);
+ }
+ if (event & PPS_CAPTURECLEAR) {
+ /* We have to add an offset? */
+ if (pps->params.mode & PPS_OFFSETCLEAR)
+ pps_add_offset(ts, &pps->params.clear_off_tu);
+
+ /* Save the time stamp */
+ pps->clear_tu = *ts;
+ pps->clear_sequence++;
+ pr_debug("capture clear seq #%u for source %d\n",
+ pps->clear_sequence, source);
+ }
+
+ pps->go = ~0;
+ wake_up_interruptible(&pps->queue);
+
+ kill_fasync(&pps->async_queue, SIGIO, POLL_IN);
+
+ spin_unlock_irqrestore(&pps->lock, flags);
+
+ /* Now we can release the PPS source for (possible) deregistration */
+ pps_put_source(pps);
+}
+EXPORT_SYMBOL(pps_event);
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
new file mode 100644
index 00000000000..ac8cc8cea1e
--- /dev/null
+++ b/drivers/pps/pps.c
@@ -0,0 +1,312 @@
+/*
+ * PPS core file
+ *
+ *
+ * Copyright (C) 2005-2009 Rodolfo Giometti <giometti@linux.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/idr.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Local variables
+ */
+
+static dev_t pps_devt;
+static struct class *pps_class;
+
+/*
+ * Char device methods
+ */
+
+static unsigned int pps_cdev_poll(struct file *file, poll_table *wait)
+{
+ struct pps_device *pps = file->private_data;
+
+ poll_wait(file, &pps->queue, wait);
+
+ return POLLIN | POLLRDNORM;
+}
+
+static int pps_cdev_fasync(int fd, struct file *file, int on)
+{
+ struct pps_device *pps = file->private_data;
+ return fasync_helper(fd, file, on, &pps->async_queue);
+}
+
+static long pps_cdev_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct pps_device *pps = file->private_data;
+ struct pps_kparams params;
+ struct pps_fdata fdata;
+ unsigned long ticks;
+ void __user *uarg = (void __user *) arg;
+ int __user *iuarg = (int __user *) arg;
+ int err;
+
+ switch (cmd) {
+ case PPS_GETPARAMS:
+ pr_debug("PPS_GETPARAMS: source %d\n", pps->id);
+
+ /* Return current parameters */
+ err = copy_to_user(uarg, &pps->params,
+ sizeof(struct pps_kparams));
+ if (err)
+ return -EFAULT;
+
+ break;
+
+ case PPS_SETPARAMS:
+ pr_debug("PPS_SETPARAMS: source %d\n", pps->id);
+
+ /* Check the capabilities */
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ err = copy_from_user(&params, uarg, sizeof(struct pps_kparams));
+ if (err)
+ return -EFAULT;
+ if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) {
+ pr_debug("capture mode unspecified (%x)\n",
+ params.mode);
+ return -EINVAL;
+ }
+
+ /* Check for supported capabilities */
+ if ((params.mode & ~pps->info.mode) != 0) {
+ pr_debug("unsupported capabilities (%x)\n",
+ params.mode);
+ return -EINVAL;
+ }
+
+ spin_lock_irq(&pps->lock);
+
+ /* Save the new parameters */
+ pps->params = params;
+
+ /* Restore the read only parameters */
+ if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
+ /* section 3.3 of RFC 2783 interpreted */
+ pr_debug("time format unspecified (%x)\n",
+ params.mode);
+ pps->params.mode |= PPS_TSFMT_TSPEC;
+ }
+ if (pps->info.mode & PPS_CANWAIT)
+ pps->params.mode |= PPS_CANWAIT;
+ pps->params.api_version = PPS_API_VERS;
+
+ spin_unlock_irq(&pps->lock);
+
+ break;
+
+ case PPS_GETCAP:
+ pr_debug("PPS_GETCAP: source %d\n", pps->id);
+
+ err = put_user(pps->info.mode, iuarg);
+ if (err)
+ return -EFAULT;
+
+ break;
+
+ case PPS_FETCH:
+ pr_debug("PPS_FETCH: source %d\n", pps->id);
+
+ err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata));
+ if (err)
+ return -EFAULT;
+
+ pps->go = 0;
+
+ /* Manage the timeout */
+ if (fdata.timeout.flags & PPS_TIME_INVALID)
+ err = wait_event_interruptible(pps->queue, pps->go);
+ else {
+ pr_debug("timeout %lld.%09d\n",
+ (long long) fdata.timeout.sec,
+ fdata.timeout.nsec);
+ ticks = fdata.timeout.sec * HZ;
+ ticks += fdata.timeout.nsec / (NSEC_PER_SEC / HZ);
+
+ if (ticks != 0) {
+ err = wait_event_interruptible_timeout(
+ pps->queue, pps->go, ticks);
+ if (err == 0)
+ return -ETIMEDOUT;
+ }
+ }
+
+ /* Check for pending signals */
+ if (err == -ERESTARTSYS) {
+ pr_debug("pending signal caught\n");
+ return -EINTR;
+ }
+
+ /* Return the fetched timestamp */
+ spin_lock_irq(&pps->lock);
+
+ fdata.info.assert_sequence = pps->assert_sequence;
+ fdata.info.clear_sequence = pps->clear_sequence;
+ fdata.info.assert_tu = pps->assert_tu;
+ fdata.info.clear_tu = pps->clear_tu;
+ fdata.info.current_mode = pps->current_mode;
+
+ spin_unlock_irq(&pps->lock);
+
+ err = copy_to_user(uarg, &fdata, sizeof(struct pps_fdata));
+ if (err)
+ return -EFAULT;
+
+ break;
+
+ default:
+ return -ENOTTY;
+ break;
+ }
+
+ return 0;
+}
+
+static int pps_cdev_open(struct inode *inode, struct file *file)
+{
+ struct pps_device *pps = container_of(inode->i_cdev,
+ struct pps_device, cdev);
+ int found;
+
+ found = pps_get_source(pps->id) != 0;
+ if (!found)
+ return -ENODEV;
+
+ file->private_data = pps;
+
+ return 0;
+}
+
+static int pps_cdev_release(struct inode *inode, struct file *file)
+{
+ struct pps_device *pps = file->private_data;
+
+ /* Free the PPS source and wake up (possible) deregistration */
+ pps_put_source(pps);
+
+ return 0;
+}
+
+/*
+ * Char device stuff
+ */
+
+static const struct file_operations pps_cdev_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .poll = pps_cdev_poll,
+ .fasync = pps_cdev_fasync,
+ .unlocked_ioctl = pps_cdev_ioctl,
+ .open = pps_cdev_open,
+ .release = pps_cdev_release,
+};
+
+int pps_register_cdev(struct pps_device *pps)
+{
+ int err;
+
+ pps->devno = MKDEV(MAJOR(pps_devt), pps->id);
+ cdev_init(&pps->cdev, &pps_cdev_fops);
+ pps->cdev.owner = pps->info.owner;
+
+ err = cdev_add(&pps->cdev, pps->devno, 1);
+ if (err) {
+ printk(KERN_ERR "pps: %s: failed to add char device %d:%d\n",
+ pps->info.name, MAJOR(pps_devt), pps->id);
+ return err;
+ }
+ pps->dev = device_create(pps_class, pps->info.dev, pps->devno, NULL,
+ "pps%d", pps->id);
+ if (err)
+ goto del_cdev;
+ dev_set_drvdata(pps->dev, pps);
+
+ pr_debug("source %s got cdev (%d:%d)\n", pps->info.name,
+ MAJOR(pps_devt), pps->id);
+
+ return 0;
+
+del_cdev:
+ cdev_del(&pps->cdev);
+
+ return err;
+}
+
+void pps_unregister_cdev(struct pps_device *pps)
+{
+ device_destroy(pps_class, pps->devno);
+ cdev_del(&pps->cdev);
+}
+
+/*
+ * Module stuff
+ */
+
+static void __exit pps_exit(void)
+{
+ class_destroy(pps_class);
+ unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES);
+}
+
+static int __init pps_init(void)
+{
+ int err;
+
+ pps_class = class_create(THIS_MODULE, "pps");
+ if (!pps_class) {
+ printk(KERN_ERR "pps: failed to allocate class\n");
+ return -ENOMEM;
+ }
+ pps_class->dev_attrs = pps_attrs;
+
+ err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps");
+ if (err < 0) {
+ printk(KERN_ERR "pps: failed to allocate char device region\n");
+ goto remove_class;
+ }
+
+ pr_info("LinuxPPS API ver. %d registered\n", PPS_API_VERS);
+ pr_info("Software ver. %s - Copyright 2005-2007 Rodolfo Giometti "
+ "<giometti@linux.it>\n", PPS_VERSION);
+
+ return 0;
+
+remove_class:
+ class_destroy(pps_class);
+
+ return err;
+}
+
+subsys_initcall(pps_init);
+module_exit(pps_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("LinuxPPS support (RFC 2783) - ver. " PPS_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/pps/sysfs.c b/drivers/pps/sysfs.c
new file mode 100644
index 00000000000..ef0978c71ee
--- /dev/null
+++ b/drivers/pps/sysfs.c
@@ -0,0 +1,98 @@
+/*
+ * PPS sysfs support
+ *
+ *
+ * Copyright (C) 2007-2009 Rodolfo Giometti <giometti@linux.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Attribute functions
+ */
+
+static ssize_t pps_show_assert(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ if (!(pps->info.mode & PPS_CAPTUREASSERT))
+ return 0;
+
+ return sprintf(buf, "%lld.%09d#%d\n",
+ (long long) pps->assert_tu.sec, pps->assert_tu.nsec,
+ pps->assert_sequence);
+}
+
+static ssize_t pps_show_clear(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ if (!(pps->info.mode & PPS_CAPTURECLEAR))
+ return 0;
+
+ return sprintf(buf, "%lld.%09d#%d\n",
+ (long long) pps->clear_tu.sec, pps->clear_tu.nsec,
+ pps->clear_sequence);
+}
+
+static ssize_t pps_show_mode(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%4x\n", pps->info.mode);
+}
+
+static ssize_t pps_show_echo(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", !!pps->info.echo);
+}
+
+static ssize_t pps_show_name(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", pps->info.name);
+}
+
+static ssize_t pps_show_path(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pps_device *pps = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", pps->info.path);
+}
+
+struct device_attribute pps_attrs[] = {
+ __ATTR(assert, S_IRUGO, pps_show_assert, NULL),
+ __ATTR(clear, S_IRUGO, pps_show_clear, NULL),
+ __ATTR(mode, S_IRUGO, pps_show_mode, NULL),
+ __ATTR(echo, S_IRUGO, pps_show_echo, NULL),
+ __ATTR(name, S_IRUGO, pps_show_name, NULL),
+ __ATTR(path, S_IRUGO, pps_show_path, NULL),
+ __ATTR_NULL,
+};
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 277d35d232f..81adbdbd504 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -296,6 +296,15 @@ config RTC_DRV_RX8581
This driver can also be built as a module. If so the module
will be called rtc-rx8581.
+config RTC_DRV_RX8025
+ tristate "Epson RX-8025SA/NB"
+ help
+ If you say yes here you get support for the Epson
+ RX-8025SA/NB RTC chips.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-rx8025.
+
endif # I2C
comment "SPI RTC drivers"
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 6c0639a14f0..3c0f2b2ac92 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o
+obj-$(CONFIG_RTC_DRV_RX8025) += rtc-rx8025.o
obj-$(CONFIG_RTC_DRV_RX8581) += rtc-rx8581.o
obj-$(CONFIG_RTC_DRV_S35390A) += rtc-s35390a.o
obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 2c4a65302a9..8a6f9a9f9cb 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -31,6 +31,8 @@ enum ds_type {
ds_1338,
ds_1339,
ds_1340,
+ ds_1388,
+ ds_3231,
m41t00,
rx_8025,
// rs5c372 too? different address...
@@ -66,6 +68,7 @@ enum ds_type {
#define DS1337_REG_CONTROL 0x0e
# define DS1337_BIT_nEOSC 0x80
# define DS1339_BIT_BBSQI 0x20
+# define DS3231_BIT_BBSQW 0x40 /* same as BBSQI */
# define DS1337_BIT_RS2 0x10
# define DS1337_BIT_RS1 0x08
# define DS1337_BIT_INTCN 0x04
@@ -94,6 +97,7 @@ enum ds_type {
struct ds1307 {
+ u8 offset; /* register's offset */
u8 regs[11];
enum ds_type type;
unsigned long flags;
@@ -128,6 +132,9 @@ static const struct chip_desc chips[] = {
},
[ds_1340] = {
},
+[ds_3231] = {
+ .alarm = 1,
+},
[m41t00] = {
},
[rx_8025] = {
@@ -138,7 +145,9 @@ static const struct i2c_device_id ds1307_id[] = {
{ "ds1337", ds_1337 },
{ "ds1338", ds_1338 },
{ "ds1339", ds_1339 },
+ { "ds1388", ds_1388 },
{ "ds1340", ds_1340 },
+ { "ds3231", ds_3231 },
{ "m41t00", m41t00 },
{ "rx8025", rx_8025 },
{ }
@@ -291,7 +300,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
/* read the RTC date and time registers all at once */
tmp = ds1307->read_block_data(ds1307->client,
- DS1307_REG_SECS, 7, ds1307->regs);
+ ds1307->offset, 7, ds1307->regs);
if (tmp != 7) {
dev_err(dev, "%s error %d\n", "read", tmp);
return -EIO;
@@ -353,6 +362,7 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
switch (ds1307->type) {
case ds_1337:
case ds_1339:
+ case ds_3231:
buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY;
break;
case ds_1340:
@@ -367,7 +377,8 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
"write", buf[0], buf[1], buf[2], buf[3],
buf[4], buf[5], buf[6]);
- result = ds1307->write_block_data(ds1307->client, 0, 7, buf);
+ result = ds1307->write_block_data(ds1307->client,
+ ds1307->offset, 7, buf);
if (result < 0) {
dev_err(dev, "%s error %d\n", "write", result);
return result;
@@ -624,6 +635,11 @@ static int __devinit ds1307_probe(struct i2c_client *client,
struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
int want_irq = false;
unsigned char *buf;
+ static const int bbsqi_bitpos[] = {
+ [ds_1337] = 0,
+ [ds_1339] = DS1339_BIT_BBSQI,
+ [ds_3231] = DS3231_BIT_BBSQW,
+ };
if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)
&& !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK))
@@ -632,9 +648,12 @@ static int __devinit ds1307_probe(struct i2c_client *client,
if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL)))
return -ENOMEM;
- ds1307->client = client;
i2c_set_clientdata(client, ds1307);
- ds1307->type = id->driver_data;
+
+ ds1307->client = client;
+ ds1307->type = id->driver_data;
+ ds1307->offset = 0;
+
buf = ds1307->regs;
if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
ds1307->read_block_data = i2c_smbus_read_i2c_block_data;
@@ -647,6 +666,7 @@ static int __devinit ds1307_probe(struct i2c_client *client,
switch (ds1307->type) {
case ds_1337:
case ds_1339:
+ case ds_3231:
/* has IRQ? */
if (ds1307->client->irq > 0 && chip->alarm) {
INIT_WORK(&ds1307->work, ds1307_work);
@@ -666,12 +686,12 @@ static int __devinit ds1307_probe(struct i2c_client *client,
ds1307->regs[0] &= ~DS1337_BIT_nEOSC;
/* Using IRQ? Disable the square wave and both alarms.
- * For ds1339, be sure alarms can trigger when we're
- * running on Vbackup (BBSQI); we assume ds1337 will
- * ignore that bit
+ * For some variants, be sure alarms can trigger when we're
+ * running on Vbackup (BBSQI/BBSQW)
*/
if (want_irq) {
- ds1307->regs[0] |= DS1337_BIT_INTCN | DS1339_BIT_BBSQI;
+ ds1307->regs[0] |= DS1337_BIT_INTCN
+ | bbsqi_bitpos[ds1307->type];
ds1307->regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE);
}
@@ -751,6 +771,9 @@ static int __devinit ds1307_probe(struct i2c_client *client,
hour);
}
break;
+ case ds_1388:
+ ds1307->offset = 1; /* Seconds starts at 1 */
+ break;
default:
break;
}
@@ -814,6 +837,8 @@ read_rtc:
case rx_8025:
case ds_1337:
case ds_1339:
+ case ds_1388:
+ case ds_3231:
break;
}
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 38d472b6340..717288527c6 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -329,8 +329,7 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev)
if (pdata->irq > 0) {
writeb(0, ioaddr + RTC_INTERRUPTS);
if (request_irq(pdata->irq, ds1553_rtc_interrupt,
- IRQF_DISABLED | IRQF_SHARED,
- pdev->name, pdev) < 0) {
+ IRQF_DISABLED, pdev->name, pdev) < 0) {
dev_warn(&pdev->dev, "interrupt not available.\n");
pdata->irq = 0;
}
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 8bc8501bffc..09249459e9a 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -57,6 +57,7 @@ struct rtc_plat_data {
size_t size;
resource_size_t baseaddr;
unsigned long last_jiffies;
+ struct bin_attribute nvram_attr;
};
static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -157,18 +158,6 @@ static ssize_t ds1742_nvram_write(struct kobject *kobj,
return count;
}
-static struct bin_attribute ds1742_nvram_attr = {
- .attr = {
- .name = "nvram",
- .mode = S_IRUGO | S_IWUSR,
- },
- .read = ds1742_nvram_read,
- .write = ds1742_nvram_write,
- /* REVISIT: size in sysfs won't match actual size... if it's
- * not a constant, each RTC should have its own attribute.
- */
-};
-
static int __devinit ds1742_rtc_probe(struct platform_device *pdev)
{
struct rtc_device *rtc;
@@ -199,6 +188,12 @@ static int __devinit ds1742_rtc_probe(struct platform_device *pdev)
pdata->size_nvram = pdata->size - RTC_SIZE;
pdata->ioaddr_rtc = ioaddr + pdata->size_nvram;
+ pdata->nvram_attr.attr.name = "nvram";
+ pdata->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
+ pdata->nvram_attr.read = ds1742_nvram_read;
+ pdata->nvram_attr.write = ds1742_nvram_write;
+ pdata->nvram_attr.size = pdata->size_nvram;
+
/* turn RTC on if it was not on */
ioaddr = pdata->ioaddr_rtc;
sec = readb(ioaddr + RTC_SECONDS);
@@ -221,11 +216,13 @@ static int __devinit ds1742_rtc_probe(struct platform_device *pdev)
pdata->rtc = rtc;
pdata->last_jiffies = jiffies;
platform_set_drvdata(pdev, pdata);
- ds1742_nvram_attr.size = max(ds1742_nvram_attr.size,
- pdata->size_nvram);
- ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr);
- if (ret)
+
+ ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
+ if (ret) {
+ dev_err(&pdev->dev, "creating nvram file in sysfs failed\n");
goto out;
+ }
+
return 0;
out:
if (pdata->rtc)
@@ -242,7 +239,7 @@ static int __devexit ds1742_rtc_remove(struct platform_device *pdev)
{
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
- sysfs_remove_bin_file(&pdev->dev.kobj, &ds1742_nvram_attr);
+ sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
rtc_device_unregister(pdata->rtc);
iounmap(pdata->ioaddr_nvram);
release_mem_region(pdata->baseaddr, pdata->size);
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
new file mode 100644
index 00000000000..b1a29bcfdf1
--- /dev/null
+++ b/drivers/rtc/rtc-rx8025.c
@@ -0,0 +1,688 @@
+/*
+ * Driver for Epson's RTC module RX-8025 SA/NB
+ *
+ * Copyright (C) 2009 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * Copyright (C) 2005 by Digi International Inc.
+ * All rights reserved.
+ *
+ * Modified by fengjh at rising.com.cn
+ * <http://lists.lm-sensors.org/mailman/listinfo/lm-sensors>
+ * 2006.11
+ *
+ * Code cleanup by Sergei Poselenov, <sposelenov@emcraft.com>
+ * Converted to new style by Wolfgang Grandegger <wg@grandegger.com>
+ * Alarm and periodic interrupt added by Dmitry Rakhchev <rda@emcraft.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/i2c.h>
+#include <linux/list.h>
+#include <linux/rtc.h>
+
+/* Register definitions */
+#define RX8025_REG_SEC 0x00
+#define RX8025_REG_MIN 0x01
+#define RX8025_REG_HOUR 0x02
+#define RX8025_REG_WDAY 0x03
+#define RX8025_REG_MDAY 0x04
+#define RX8025_REG_MONTH 0x05
+#define RX8025_REG_YEAR 0x06
+#define RX8025_REG_DIGOFF 0x07
+#define RX8025_REG_ALWMIN 0x08
+#define RX8025_REG_ALWHOUR 0x09
+#define RX8025_REG_ALWWDAY 0x0a
+#define RX8025_REG_ALDMIN 0x0b
+#define RX8025_REG_ALDHOUR 0x0c
+/* 0x0d is reserved */
+#define RX8025_REG_CTRL1 0x0e
+#define RX8025_REG_CTRL2 0x0f
+
+#define RX8025_BIT_CTRL1_CT (7 << 0)
+/* 1 Hz periodic level irq */
+#define RX8025_BIT_CTRL1_CT_1HZ 4
+#define RX8025_BIT_CTRL1_TEST (1 << 3)
+#define RX8025_BIT_CTRL1_1224 (1 << 5)
+#define RX8025_BIT_CTRL1_DALE (1 << 6)
+#define RX8025_BIT_CTRL1_WALE (1 << 7)
+
+#define RX8025_BIT_CTRL2_DAFG (1 << 0)
+#define RX8025_BIT_CTRL2_WAFG (1 << 1)
+#define RX8025_BIT_CTRL2_CTFG (1 << 2)
+#define RX8025_BIT_CTRL2_PON (1 << 4)
+#define RX8025_BIT_CTRL2_XST (1 << 5)
+#define RX8025_BIT_CTRL2_VDET (1 << 6)
+
+/* Clock precision adjustment */
+#define RX8025_ADJ_RESOLUTION 3050 /* in ppb */
+#define RX8025_ADJ_DATA_MAX 62
+#define RX8025_ADJ_DATA_MIN -62
+
+static const struct i2c_device_id rx8025_id[] = {
+ { "rx8025", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, rx8025_id);
+
+struct rx8025_data {
+ struct i2c_client *client;
+ struct rtc_device *rtc;
+ struct work_struct work;
+ u8 ctrl1;
+ unsigned exiting:1;
+};
+
+static int rx8025_read_reg(struct i2c_client *client, int number, u8 *value)
+{
+ int ret = i2c_smbus_read_byte_data(client, (number << 4) | 0x08);
+
+ if (ret < 0) {
+ dev_err(&client->dev, "Unable to read register #%d\n", number);
+ return ret;
+ }
+
+ *value = ret;
+ return 0;
+}
+
+static int rx8025_read_regs(struct i2c_client *client,
+ int number, u8 length, u8 *values)
+{
+ int ret = i2c_smbus_read_i2c_block_data(client, (number << 4) | 0x08,
+ length, values);
+
+ if (ret != length) {
+ dev_err(&client->dev, "Unable to read registers #%d..#%d\n",
+ number, number + length - 1);
+ return ret < 0 ? ret : -EIO;
+ }
+
+ return 0;
+}
+
+static int rx8025_write_reg(struct i2c_client *client, int number, u8 value)
+{
+ int ret = i2c_smbus_write_byte_data(client, number << 4, value);
+
+ if (ret)
+ dev_err(&client->dev, "Unable to write register #%d\n",
+ number);
+
+ return ret;
+}
+
+static int rx8025_write_regs(struct i2c_client *client,
+ int number, u8 length, u8 *values)
+{
+ int ret = i2c_smbus_write_i2c_block_data(client, (number << 4) | 0x08,
+ length, values);
+
+ if (ret)
+ dev_err(&client->dev, "Unable to write registers #%d..#%d\n",
+ number, number + length - 1);
+
+ return ret;
+}
+
+static irqreturn_t rx8025_irq(int irq, void *dev_id)
+{
+ struct i2c_client *client = dev_id;
+ struct rx8025_data *rx8025 = i2c_get_clientdata(client);
+
+ disable_irq_nosync(irq);
+ schedule_work(&rx8025->work);
+ return IRQ_HANDLED;
+}
+
+static void rx8025_work(struct work_struct *work)
+{
+ struct rx8025_data *rx8025 = container_of(work, struct rx8025_data,
+ work);
+ struct i2c_client *client = rx8025->client;
+ struct mutex *lock = &rx8025->rtc->ops_lock;
+ u8 status;
+
+ mutex_lock(lock);
+
+ if (rx8025_read_reg(client, RX8025_REG_CTRL2, &status))
+ goto out;
+
+ if (!(status & RX8025_BIT_CTRL2_XST))
+ dev_warn(&client->dev, "Oscillation stop was detected,"
+ "you may have to readjust the clock\n");
+
+ if (status & RX8025_BIT_CTRL2_CTFG) {
+ /* periodic */
+ status &= ~RX8025_BIT_CTRL2_CTFG;
+ local_irq_disable();
+ rtc_update_irq(rx8025->rtc, 1, RTC_PF | RTC_IRQF);
+ local_irq_enable();
+ }
+
+ if (status & RX8025_BIT_CTRL2_DAFG) {
+ /* alarm */
+ status &= RX8025_BIT_CTRL2_DAFG;
+ if (rx8025_write_reg(client, RX8025_REG_CTRL1,
+ rx8025->ctrl1 & ~RX8025_BIT_CTRL1_DALE))
+ goto out;
+ local_irq_disable();
+ rtc_update_irq(rx8025->rtc, 1, RTC_AF | RTC_IRQF);
+ local_irq_enable();
+ }
+
+ /* acknowledge IRQ */
+ rx8025_write_reg(client, RX8025_REG_CTRL2,
+ status | RX8025_BIT_CTRL2_XST);
+
+out:
+ if (!rx8025->exiting)
+ enable_irq(client->irq);
+
+ mutex_unlock(lock);
+}
+
+static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
+{
+ struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+ u8 date[7];
+ int err;
+
+ err = rx8025_read_regs(rx8025->client, RX8025_REG_SEC, 7, date);
+ if (err)
+ return err;
+
+ dev_dbg(dev, "%s: read 0x%02x 0x%02x "
+ "0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n", __func__,
+ date[0], date[1], date[2], date[3], date[4],
+ date[5], date[6]);
+
+ dt->tm_sec = bcd2bin(date[RX8025_REG_SEC] & 0x7f);
+ dt->tm_min = bcd2bin(date[RX8025_REG_MIN] & 0x7f);
+ if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
+ dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x3f);
+ else
+ dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x1f) % 12
+ + (date[RX8025_REG_HOUR] & 0x20 ? 12 : 0);
+
+ dt->tm_mday = bcd2bin(date[RX8025_REG_MDAY] & 0x3f);
+ dt->tm_mon = bcd2bin(date[RX8025_REG_MONTH] & 0x1f) - 1;
+ dt->tm_year = bcd2bin(date[RX8025_REG_YEAR]);
+
+ if (dt->tm_year < 70)
+ dt->tm_year += 100;
+
+ dev_dbg(dev, "%s: date %ds %dm %dh %dmd %dm %dy\n", __func__,
+ dt->tm_sec, dt->tm_min, dt->tm_hour,
+ dt->tm_mday, dt->tm_mon, dt->tm_year);
+
+ return rtc_valid_tm(dt);
+}
+
+static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
+{
+ struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+ u8 date[7];
+
+ /*
+ * BUG: The HW assumes every year that is a multiple of 4 to be a leap
+ * year. Next time this is wrong is 2100, which will not be a leap
+ * year.
+ */
+
+ /*
+ * Here the read-only bits are written as "0". I'm not sure if that
+ * is sound.
+ */
+ date[RX8025_REG_SEC] = bin2bcd(dt->tm_sec);
+ date[RX8025_REG_MIN] = bin2bcd(dt->tm_min);
+ if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
+ date[RX8025_REG_HOUR] = bin2bcd(dt->tm_hour);
+ else
+ date[RX8025_REG_HOUR] = (dt->tm_hour >= 12 ? 0x20 : 0)
+ | bin2bcd((dt->tm_hour + 11) % 12 + 1);
+
+ date[RX8025_REG_WDAY] = bin2bcd(dt->tm_wday);
+ date[RX8025_REG_MDAY] = bin2bcd(dt->tm_mday);
+ date[RX8025_REG_MONTH] = bin2bcd(dt->tm_mon + 1);
+ date[RX8025_REG_YEAR] = bin2bcd(dt->tm_year % 100);
+
+ dev_dbg(dev,
+ "%s: write 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+ __func__,
+ date[0], date[1], date[2], date[3], date[4], date[5], date[6]);
+
+ return rx8025_write_regs(rx8025->client, RX8025_REG_SEC, 7, date);
+}
+
+static int rx8025_init_client(struct i2c_client *client, int *need_reset)
+{
+ struct rx8025_data *rx8025 = i2c_get_clientdata(client);
+ u8 ctrl[2], ctrl2;
+ int need_clear = 0;
+ int err;
+
+ err = rx8025_read_regs(rx8025->client, RX8025_REG_CTRL1, 2, ctrl);
+ if (err)
+ goto out;
+
+ /* Keep test bit zero ! */
+ rx8025->ctrl1 = ctrl[0] & ~RX8025_BIT_CTRL1_TEST;
+
+ if (ctrl[1] & RX8025_BIT_CTRL2_PON) {
+ dev_warn(&client->dev, "power-on reset was detected, "
+ "you may have to readjust the clock\n");
+ *need_reset = 1;
+ }
+
+ if (ctrl[1] & RX8025_BIT_CTRL2_VDET) {
+ dev_warn(&client->dev, "a power voltage drop was detected, "
+ "you may have to readjust the clock\n");
+ *need_reset = 1;
+ }
+
+ if (!(ctrl[1] & RX8025_BIT_CTRL2_XST)) {
+ dev_warn(&client->dev, "Oscillation stop was detected,"
+ "you may have to readjust the clock\n");
+ *need_reset = 1;
+ }
+
+ if (ctrl[1] & (RX8025_BIT_CTRL2_DAFG | RX8025_BIT_CTRL2_WAFG)) {
+ dev_warn(&client->dev, "Alarm was detected\n");
+ need_clear = 1;
+ }
+
+ if (!(ctrl[1] & RX8025_BIT_CTRL2_CTFG))
+ need_clear = 1;
+
+ if (*need_reset || need_clear) {
+ ctrl2 = ctrl[0];
+ ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET |
+ RX8025_BIT_CTRL2_CTFG | RX8025_BIT_CTRL2_WAFG |
+ RX8025_BIT_CTRL2_DAFG);
+ ctrl2 |= RX8025_BIT_CTRL2_XST;
+
+ err = rx8025_write_reg(client, RX8025_REG_CTRL2, ctrl2);
+ }
+out:
+ return err;
+}
+
+/* Alarm support */
+static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+ struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+ struct i2c_client *client = rx8025->client;
+ u8 ctrl2, ald[2];
+ int err;
+
+ if (client->irq <= 0)
+ return -EINVAL;
+
+ err = rx8025_read_regs(client, RX8025_REG_ALDMIN, 2, ald);
+ if (err)
+ return err;
+
+ err = rx8025_read_reg(client, RX8025_REG_CTRL2, &ctrl2);
+ if (err)
+ return err;
+
+ dev_dbg(dev, "%s: read alarm 0x%02x 0x%02x ctrl2 %02x\n",
+ __func__, ald[0], ald[1], ctrl2);
+
+ /* Hardware alarms precision is 1 minute! */
+ t->time.tm_sec = 0;
+ t->time.tm_min = bcd2bin(ald[0] & 0x7f);
+ if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
+ t->time.tm_hour = bcd2bin(ald[1] & 0x3f);
+ else
+ t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12
+ + (ald[1] & 0x20 ? 12 : 0);
+
+ t->time.tm_wday = -1;
+ t->time.tm_mday = -1;
+ t->time.tm_mon = -1;
+ t->time.tm_year = -1;
+
+ dev_dbg(dev, "%s: date: %ds %dm %dh %dmd %dm %dy\n",
+ __func__,
+ t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
+ t->time.tm_mday, t->time.tm_mon, t->time.tm_year);
+ t->enabled = !!(rx8025->ctrl1 & RX8025_BIT_CTRL1_DALE);
+ t->pending = (ctrl2 & RX8025_BIT_CTRL2_DAFG) && t->enabled;
+
+ return err;
+}
+
+static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+ u8 ald[2];
+ int err;
+
+ if (client->irq <= 0)
+ return -EINVAL;
+
+ /* Hardware alarm precision is 1 minute! */
+ ald[0] = bin2bcd(t->time.tm_min);
+ if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
+ ald[1] = bin2bcd(t->time.tm_hour);
+ else
+ ald[1] = (t->time.tm_hour >= 12 ? 0x20 : 0)
+ | bin2bcd((t->time.tm_hour + 11) % 12 + 1);
+
+ dev_dbg(dev, "%s: write 0x%02x 0x%02x\n", __func__, ald[0], ald[1]);
+
+ if (rx8025->ctrl1 & RX8025_BIT_CTRL1_DALE) {
+ rx8025->ctrl1 &= ~RX8025_BIT_CTRL1_DALE;
+ err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
+ rx8025->ctrl1);
+ if (err)
+ return err;
+ }
+ err = rx8025_write_regs(rx8025->client, RX8025_REG_ALDMIN, 2, ald);
+ if (err)
+ return err;
+
+ if (t->enabled) {
+ rx8025->ctrl1 |= RX8025_BIT_CTRL1_DALE;
+ err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
+ rx8025->ctrl1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct rx8025_data *rx8025 = dev_get_drvdata(dev);
+ u8 ctrl1;
+ int err;
+
+ ctrl1 = rx8025->ctrl1;
+ if (enabled)
+ ctrl1 |= RX8025_BIT_CTRL1_DALE;
+ else
+ ctrl1 &= ~RX8025_BIT_CTRL1_DALE;
+
+ if (ctrl1 != rx8025->ctrl1) {
+ rx8025->ctrl1 = ctrl1;
+ err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
+ rx8025->ctrl1);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int rx8025_irq_set_state(struct device *dev, int enabled)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct rx8025_data *rx8025 = i2c_get_clientdata(client);
+ int ctrl1;
+ int err;
+
+ if (client->irq <= 0)
+ return -ENXIO;
+
+ ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT;
+ if (enabled)
+ ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ;
+ if (ctrl1 != rx8025->ctrl1) {
+ rx8025->ctrl1 = ctrl1;
+ err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
+ rx8025->ctrl1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct rtc_class_ops rx8025_rtc_ops = {
+ .read_time = rx8025_get_time,
+ .set_time = rx8025_set_time,
+ .read_alarm = rx8025_read_alarm,
+ .set_alarm = rx8025_set_alarm,
+ .alarm_irq_enable = rx8025_alarm_irq_enable,
+ .irq_set_state = rx8025_irq_set_state,
+};
+
+/*
+ * Clock precision adjustment support
+ *
+ * According to the RX8025 SA/NB application manual the frequency and
+ * temperature charateristics can be approximated using the following
+ * equation:
+ *
+ * df = a * (ut - t)**2
+ *
+ * df: Frequency deviation in any temperature
+ * a : Coefficient = (-35 +-5) * 10**-9
+ * ut: Ultimate temperature in degree = +25 +-5 degree
+ * t : Any temperature in degree
+ *
+ * Note that the clock adjustment in ppb must be entered (which is
+ * the negative value of the deviation).
+ */
+static int rx8025_get_clock_adjust(struct device *dev, int *adj)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ u8 digoff;
+ int err;
+
+ err = rx8025_read_reg(client, RX8025_REG_DIGOFF, &digoff);
+ if (err)
+ return err;
+
+ *adj = digoff >= 64 ? digoff - 128 : digoff;
+ if (*adj > 0)
+ (*adj)--;
+ *adj *= -RX8025_ADJ_RESOLUTION;
+
+ return 0;
+}
+
+static int rx8025_set_clock_adjust(struct device *dev, int adj)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ u8 digoff;
+ int err;
+
+ adj /= -RX8025_ADJ_RESOLUTION;
+ if (adj > RX8025_ADJ_DATA_MAX)
+ adj = RX8025_ADJ_DATA_MAX;
+ else if (adj < RX8025_ADJ_DATA_MIN)
+ adj = RX8025_ADJ_DATA_MIN;
+ else if (adj > 0)
+ adj++;
+ else if (adj < 0)
+ adj += 128;
+ digoff = adj;
+
+ err = rx8025_write_reg(client, RX8025_REG_DIGOFF, digoff);
+ if (err)
+ return err;
+
+ dev_dbg(dev, "%s: write 0x%02x\n", __func__, digoff);
+
+ return 0;
+}
+
+static ssize_t rx8025_sysfs_show_clock_adjust(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int err, adj;
+
+ err = rx8025_get_clock_adjust(dev, &adj);
+ if (err)
+ return err;
+
+ return sprintf(buf, "%d\n", adj);
+}
+
+static ssize_t rx8025_sysfs_store_clock_adjust(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int adj, err;
+
+ if (sscanf(buf, "%i", &adj) != 1)
+ return -EINVAL;
+
+ err = rx8025_set_clock_adjust(dev, adj);
+
+ return err ? err : count;
+}
+
+static DEVICE_ATTR(clock_adjust_ppb, S_IRUGO | S_IWUSR,
+ rx8025_sysfs_show_clock_adjust,
+ rx8025_sysfs_store_clock_adjust);
+
+static int rx8025_sysfs_register(struct device *dev)
+{
+ return device_create_file(dev, &dev_attr_clock_adjust_ppb);
+}
+
+static void rx8025_sysfs_unregister(struct device *dev)
+{
+ device_remove_file(dev, &dev_attr_clock_adjust_ppb);
+}
+
+static int __devinit rx8025_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct rx8025_data *rx8025;
+ int err, need_reset = 0;
+
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
+ | I2C_FUNC_SMBUS_I2C_BLOCK)) {
+ dev_err(&adapter->dev,
+ "doesn't support required functionality\n");
+ err = -EIO;
+ goto errout;
+ }
+
+ rx8025 = kzalloc(sizeof(*rx8025), GFP_KERNEL);
+ if (!rx8025) {
+ dev_err(&adapter->dev, "failed to alloc memory\n");
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ rx8025->client = client;
+ i2c_set_clientdata(client, rx8025);
+ INIT_WORK(&rx8025->work, rx8025_work);
+
+ err = rx8025_init_client(client, &need_reset);
+ if (err)
+ goto errout_free;
+
+ if (need_reset) {
+ struct rtc_time tm;
+ dev_info(&client->dev,
+ "bad conditions detected, resetting date\n");
+ rtc_time_to_tm(0, &tm); /* 1970/1/1 */
+ rx8025_set_time(&client->dev, &tm);
+ }
+
+ rx8025->rtc = rtc_device_register(client->name, &client->dev,
+ &rx8025_rtc_ops, THIS_MODULE);
+ if (IS_ERR(rx8025->rtc)) {
+ err = PTR_ERR(rx8025->rtc);
+ dev_err(&client->dev, "unable to register the class device\n");
+ goto errout_free;
+ }
+
+ if (client->irq > 0) {
+ dev_info(&client->dev, "IRQ %d supplied\n", client->irq);
+ err = request_irq(client->irq, rx8025_irq,
+ 0, "rx8025", client);
+ if (err) {
+ dev_err(&client->dev, "unable to request IRQ\n");
+ goto errout_reg;
+ }
+ }
+
+ rx8025->rtc->irq_freq = 1;
+ rx8025->rtc->max_user_freq = 1;
+
+ err = rx8025_sysfs_register(&client->dev);
+ if (err)
+ goto errout_irq;
+
+ return 0;
+
+errout_irq:
+ if (client->irq > 0)
+ free_irq(client->irq, client);
+
+errout_reg:
+ rtc_device_unregister(rx8025->rtc);
+
+errout_free:
+ i2c_set_clientdata(client, NULL);
+ kfree(rx8025);
+
+errout:
+ dev_err(&adapter->dev, "probing for rx8025 failed\n");
+ return err;
+}
+
+static int __devexit rx8025_remove(struct i2c_client *client)
+{
+ struct rx8025_data *rx8025 = i2c_get_clientdata(client);
+ struct mutex *lock = &rx8025->rtc->ops_lock;
+
+ if (client->irq > 0) {
+ mutex_lock(lock);
+ rx8025->exiting = 1;
+ mutex_unlock(lock);
+
+ free_irq(client->irq, client);
+ flush_scheduled_work();
+ }
+
+ rx8025_sysfs_unregister(&client->dev);
+ rtc_device_unregister(rx8025->rtc);
+ i2c_set_clientdata(client, NULL);
+ kfree(rx8025);
+ return 0;
+}
+
+static struct i2c_driver rx8025_driver = {
+ .driver = {
+ .name = "rtc-rx8025",
+ .owner = THIS_MODULE,
+ },
+ .probe = rx8025_probe,
+ .remove = __devexit_p(rx8025_remove),
+ .id_table = rx8025_id,
+};
+
+static int __init rx8025_init(void)
+{
+ return i2c_add_driver(&rx8025_driver);
+}
+
+static void __exit rx8025_exit(void)
+{
+ i2c_del_driver(&rx8025_driver);
+}
+
+MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
+MODULE_DESCRIPTION("RX-8025 SA/NB RTC driver");
+MODULE_LICENSE("GPL");
+
+module_init(rx8025_init);
+module_exit(rx8025_exit);
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 4ee4857ff20..4a6ed1104fb 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -261,10 +261,8 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
- IRQF_DISABLED | IRQF_SHARED,
- pdev->name, &pdev->dev) < 0) {
+ IRQF_DISABLED, pdev->name, &pdev->dev) < 0)
return -EBUSY;
- }
rtc = rtc_device_register(pdev->name, &pdev->dev,
&tx4939_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 12e443cc4ac..f5b3fdbb1e2 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -530,9 +530,6 @@ atmel_spi_interrupt(int irq, void *dev_id)
return ret;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
static int atmel_spi_setup(struct spi_device *spi)
{
struct atmel_spi *as;
@@ -555,8 +552,6 @@ static int atmel_spi_setup(struct spi_device *spi)
return -EINVAL;
}
- if (bits == 0)
- bits = 8;
if (bits < 8 || bits > 16) {
dev_dbg(&spi->dev,
"setup: invalid bits_per_word %u (8 to 16)\n",
@@ -564,12 +559,6 @@ static int atmel_spi_setup(struct spi_device *spi)
return -EINVAL;
}
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
/* see notes above re chipselect */
if (!atmel_spi_is_v2()
&& spi->chip_select == 0
@@ -775,6 +764,9 @@ static int __init atmel_spi_probe(struct platform_device *pdev)
if (!master)
goto out_free;
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
master->bus_num = pdev->id;
master->num_chipselect = 4;
master->setup = atmel_spi_setup;
diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index b02f25c702f..76cbc1a6659 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -284,27 +284,16 @@ static int au1550_spi_setupxfer(struct spi_device *spi, struct spi_transfer *t)
return 0;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST)
-
static int au1550_spi_setup(struct spi_device *spi)
{
struct au1550_spi *hw = spi_master_get_devdata(spi->master);
- if (spi->bits_per_word == 0)
- spi->bits_per_word = 8;
if (spi->bits_per_word < 4 || spi->bits_per_word > 24) {
dev_err(&spi->dev, "setup: invalid bits_per_word=%d\n",
spi->bits_per_word);
return -EINVAL;
}
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
if (spi->max_speed_hz == 0)
spi->max_speed_hz = hw->freq_max;
if (spi->max_speed_hz > hw->freq_max
@@ -781,6 +770,9 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
goto err_nomem;
}
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+
hw = spi_master_get_devdata(master);
hw->master = spi_master_get(master);
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 68c77a91159..1b74d5ca03f 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/types.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/of_platform.h>
@@ -30,8 +31,7 @@
struct mpc52xx_psc_spi {
/* fsl_spi_platform data */
- void (*activate_cs)(u8, u8);
- void (*deactivate_cs)(u8, u8);
+ void (*cs_control)(struct spi_device *spi, bool on);
u32 sysclk;
/* driver internal data */
@@ -111,18 +111,16 @@ static void mpc52xx_psc_spi_activate_cs(struct spi_device *spi)
out_be16((u16 __iomem *)&psc->ccr, ccr);
mps->bits_per_word = cs->bits_per_word;
- if (mps->activate_cs)
- mps->activate_cs(spi->chip_select,
- (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+ if (mps->cs_control)
+ mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
}
static void mpc52xx_psc_spi_deactivate_cs(struct spi_device *spi)
{
struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master);
- if (mps->deactivate_cs)
- mps->deactivate_cs(spi->chip_select,
- (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+ if (mps->cs_control)
+ mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
}
#define MPC52xx_PSC_BUFSIZE (MPC52xx_PSC_RFNUM_MASK + 1)
@@ -261,9 +259,6 @@ static void mpc52xx_psc_spi_work(struct work_struct *work)
spin_unlock_irq(&mps->lock);
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST)
-
static int mpc52xx_psc_spi_setup(struct spi_device *spi)
{
struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master);
@@ -273,12 +268,6 @@ static int mpc52xx_psc_spi_setup(struct spi_device *spi)
if (spi->bits_per_word%8)
return -EINVAL;
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
if (!cs) {
cs = kzalloc(sizeof *cs, GFP_KERNEL);
if (!cs)
@@ -385,18 +374,19 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
dev_set_drvdata(dev, master);
mps = spi_master_get_devdata(master);
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+
mps->irq = irq;
if (pdata == NULL) {
dev_warn(dev, "probe called without platform data, no "
- "(de)activate_cs function will be called\n");
- mps->activate_cs = NULL;
- mps->deactivate_cs = NULL;
+ "cs_control function will be called\n");
+ mps->cs_control = NULL;
mps->sysclk = 0;
master->bus_num = bus_num;
master->num_chipselect = 255;
} else {
- mps->activate_cs = pdata->activate_cs;
- mps->deactivate_cs = pdata->deactivate_cs;
+ mps->cs_control = pdata->cs_control;
mps->sysclk = pdata->sysclk;
master->bus_num = pdata->bus_num;
master->num_chipselect = pdata->max_chipselect;
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index d6d0c5d241c..eee4b6e0af2 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -603,9 +603,6 @@ static int omap2_mcspi_request_dma(struct spi_device *spi)
return 0;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
static int omap2_mcspi_setup(struct spi_device *spi)
{
int ret;
@@ -613,15 +610,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
struct omap2_mcspi_dma *mcspi_dma;
struct omap2_mcspi_cs *cs = spi->controller_state;
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
- if (spi->bits_per_word == 0)
- spi->bits_per_word = 8;
- else if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
+ if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
dev_dbg(&spi->dev, "setup: unsupported %d bit words\n",
spi->bits_per_word);
return -EINVAL;
@@ -984,6 +973,9 @@ static int __init omap2_mcspi_probe(struct platform_device *pdev)
return -ENOMEM;
}
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
if (pdev->id != -1)
master->bus_num = pdev->id;
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index fe8b9ac0cce..aa90ddb3706 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -339,8 +339,6 @@ static int uwire_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
bits = spi->bits_per_word;
if (t != NULL && t->bits_per_word)
bits = t->bits_per_word;
- if (!bits)
- bits = 8;
if (bits > 16) {
pr_debug("%s: wordsize %d?\n", dev_name(&spi->dev), bits);
@@ -449,19 +447,10 @@ done:
return status;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
static int uwire_setup(struct spi_device *spi)
{
struct uwire_state *ust = spi->controller_state;
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
if (ust == NULL) {
ust = kzalloc(sizeof(*ust), GFP_KERNEL);
if (ust == NULL)
@@ -522,6 +511,9 @@ static int __init uwire_probe(struct platform_device *pdev)
uwire_write_reg(UWIRE_SR3, 1);
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
master->bus_num = 2; /* "official" */
master->num_chipselect = 4;
master->setup = uwire_setup;
diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index c8b0babdc2a..3aea50da7b2 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c
@@ -358,20 +358,11 @@ static int orion_spi_setup(struct spi_device *spi)
orion_spi = spi_master_get_devdata(spi->master);
- if (spi->mode) {
- dev_err(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode);
- return -EINVAL;
- }
-
/* Fix ac timing if required. */
if (orion_spi->spi_info->enable_clock_fix)
orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
(1 << 14));
- if (spi->bits_per_word == 0)
- spi->bits_per_word = 8;
-
if ((spi->max_speed_hz == 0)
|| (spi->max_speed_hz > orion_spi->max_speed))
spi->max_speed_hz = orion_spi->max_speed;
@@ -476,6 +467,9 @@ static int __init orion_spi_probe(struct platform_device *pdev)
if (pdev->id != -1)
master->bus_num = pdev->id;
+ /* we support only mode 0, and no options */
+ master->mode_bits = 0;
+
master->setup = orion_spi_setup;
master->transfer = orion_spi_transfer;
master->num_chipselect = ORION_NUM_CHIPSELECTS;
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 3f3c08c6ba4..d949dbf1141 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1185,9 +1185,6 @@ static int transfer(struct spi_device *spi, struct spi_message *msg)
return 0;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA)
-
static int setup_cs(struct spi_device *spi, struct chip_data *chip,
struct pxa2xx_spi_chip *chip_info)
{
@@ -1236,9 +1233,6 @@ static int setup(struct spi_device *spi)
uint tx_thres = TX_THRESH_DFLT;
uint rx_thres = RX_THRESH_DFLT;
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
if (drv_data->ssp_type != PXA25x_SSP
&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
@@ -1255,12 +1249,6 @@ static int setup(struct spi_device *spi)
return -EINVAL;
}
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
/* Only alloc on first setup */
chip = spi_get_ctldata(spi);
if (!chip) {
@@ -1328,18 +1316,14 @@ static int setup(struct spi_device *spi)
/* NOTE: PXA25x_SSP _could_ use external clocking ... */
if (drv_data->ssp_type != PXA25x_SSP)
- dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n",
- spi->bits_per_word,
+ dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
clk_get_rate(ssp->clk)
/ (1 + ((chip->cr0 & SSCR0_SCR) >> 8)),
- spi->mode & 0x3,
chip->enable_dma ? "DMA" : "PIO");
else
- dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n",
- spi->bits_per_word,
+ dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
clk_get_rate(ssp->clk) / 2
/ (1 + ((chip->cr0 & SSCR0_SCR) >> 8)),
- spi->mode & 0x3,
chip->enable_dma ? "DMA" : "PIO");
if (spi->bits_per_word <= 8) {
@@ -1500,6 +1484,9 @@ static int __init pxa2xx_spi_probe(struct platform_device *pdev)
drv_data->pdev = pdev;
drv_data->ssp = ssp;
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
master->bus_num = pdev->id;
master->num_chipselect = platform_info->num_chipselect;
master->dma_alignment = DMA_ALIGNMENT;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8eba98c8ed1..70845ccd85c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -265,7 +265,7 @@ int spi_add_device(struct spi_device *spi)
* normally rely on the device being setup. Devices
* using SPI_CS_HIGH can't coexist well otherwise...
*/
- status = spi->master->setup(spi);
+ status = spi_setup(spi);
if (status < 0) {
dev_err(dev, "can't %s %s, status %d\n",
"setup", dev_name(&spi->dev), status);
@@ -583,6 +583,70 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master);
/*-------------------------------------------------------------------------*/
+/* Core methods for SPI master protocol drivers. Some of the
+ * other core methods are currently defined as inline functions.
+ */
+
+/**
+ * spi_setup - setup SPI mode and clock rate
+ * @spi: the device whose settings are being modified
+ * Context: can sleep, and no requests are queued to the device
+ *
+ * SPI protocol drivers may need to update the transfer mode if the
+ * device doesn't work with its default. They may likewise need
+ * to update clock rates or word sizes from initial values. This function
+ * changes those settings, and must be called from a context that can sleep.
+ * Except for SPI_CS_HIGH, which takes effect immediately, the changes take
+ * effect the next time the device is selected and data is transferred to
+ * or from it. When this function returns, the spi device is deselected.
+ *
+ * Note that this call will fail if the protocol driver specifies an option
+ * that the underlying controller or its driver does not support. For
+ * example, not all hardware supports wire transfers using nine bit words,
+ * LSB-first wire encoding, or active-high chipselects.
+ */
+int spi_setup(struct spi_device *spi)
+{
+ unsigned bad_bits;
+ int status;
+
+ /* help drivers fail *cleanly* when they need options
+ * that aren't supported with their current master
+ */
+ bad_bits = spi->mode & ~spi->master->mode_bits;
+ if (bad_bits) {
+ dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
+ bad_bits);
+ return -EINVAL;
+ }
+
+ if (!spi->bits_per_word)
+ spi->bits_per_word = 8;
+
+ status = spi->master->setup(spi);
+
+ dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s"
+ "%u bits/w, %u Hz max --> %d\n",
+ (int) (spi->mode & (SPI_CPOL | SPI_CPHA)),
+ (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
+ (spi->mode & SPI_LSB_FIRST) ? "lsb, " : "",
+ (spi->mode & SPI_3WIRE) ? "3wire, " : "",
+ (spi->mode & SPI_LOOP) ? "loopback, " : "",
+ spi->bits_per_word, spi->max_speed_hz,
+ status);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(spi_setup);
+
+
+/*-------------------------------------------------------------------------*/
+
+/* Utility methods for SPI master protocol drivers, layered on
+ * top of the core. Some other utility methods are defined as
+ * inline functions.
+ */
+
static void spi_complete(void *arg)
{
complete(arg);
@@ -636,8 +700,8 @@ static u8 *buf;
* @spi: device with which data will be exchanged
* @txbuf: data to be written (need not be dma-safe)
* @n_tx: size of txbuf, in bytes
- * @rxbuf: buffer into which data will be read
- * @n_rx: size of rxbuf, in bytes (need not be dma-safe)
+ * @rxbuf: buffer into which data will be read (need not be dma-safe)
+ * @n_rx: size of rxbuf, in bytes
* Context: can sleep
*
* This performs a half duplex MicroWire style transaction with the
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index 011c5bddba6..73e24ef5a2f 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -169,7 +169,7 @@ static int bfin_spi_flush(struct driver_data *drv_data)
unsigned long limit = loops_per_jiffy << 1;
/* wait for stop and clear stat */
- while (!(read_STAT(drv_data) & BIT_STAT_SPIF) && limit--)
+ while (!(read_STAT(drv_data) & BIT_STAT_SPIF) && --limit)
cpu_relax();
write_STAT(drv_data, BIT_STAT_CLR);
@@ -1010,16 +1010,6 @@ static int bfin_spi_setup(struct spi_device *spi)
struct driver_data *drv_data = spi_master_get_devdata(spi->master);
int ret;
- /* Abort device setup if requested features are not supported */
- if (spi->mode & ~(SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST)) {
- dev_err(&spi->dev, "requested mode not fully supported\n");
- return -EINVAL;
- }
-
- /* Zero (the default) here means 8 bits */
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
return -EINVAL;
@@ -1287,6 +1277,9 @@ static int __init bfin_spi_probe(struct platform_device *pdev)
drv_data->pdev = pdev;
drv_data->pin_req = platform_info->pin_req;
+ /* the spi->mode bits supported by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+
master->bus_num = pdev->id;
master->num_chipselect = platform_info->num_chipselect;
master->cleanup = bfin_spi_cleanup;
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 85e61f45121..2a5abc08e85 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -188,12 +188,6 @@ int spi_bitbang_setup(struct spi_device *spi)
bitbang = spi_master_get_devdata(spi->master);
- /* Bitbangers can support SPI_CS_HIGH, SPI_3WIRE, and so on;
- * add those to master->flags, and provide the other support.
- */
- if ((spi->mode & ~(SPI_CPOL|SPI_CPHA|bitbang->flags)) != 0)
- return -EINVAL;
-
if (!cs) {
cs = kzalloc(sizeof *cs, GFP_KERNEL);
if (!cs)
@@ -201,9 +195,6 @@ int spi_bitbang_setup(struct spi_device *spi)
spi->controller_state = cs;
}
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
/* per-word shift register access, in hardware or bitbanging */
cs->txrx_word = bitbang->txrx_word[spi->mode & (SPI_CPOL|SPI_CPHA)];
if (!cs->txrx_word)
@@ -213,9 +204,7 @@ int spi_bitbang_setup(struct spi_device *spi)
if (retval < 0)
return retval;
- dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n",
- __func__, spi->mode & (SPI_CPOL | SPI_CPHA),
- spi->bits_per_word, 2 * cs->nsecs);
+ dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs);
/* NOTE we _need_ to call chipselect() early, ideally with adapter
* setup, unless the hardware defaults cooperate to avoid confusion
@@ -457,6 +446,9 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
spin_lock_init(&bitbang->lock);
INIT_LIST_HEAD(&bitbang->queue);
+ if (!bitbang->master->mode_bits)
+ bitbang->master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags;
+
if (!bitbang->master->transfer)
bitbang->master->transfer = spi_bitbang_transfer;
if (!bitbang->txrx_bufs) {
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index 0671aeef579..c195e45f7f3 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -1171,9 +1171,6 @@ msg_rejected:
return -EINVAL;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
/* On first setup bad values must free chip_data memory since will cause
spi_new_device to fail. Bad value setup from protocol driver are simply not
applied and notified to the calling driver. */
@@ -1186,12 +1183,6 @@ static int setup(struct spi_device *spi)
u32 tmp;
int status = 0;
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
/* Get controller data */
chip_info = spi->controller_data;
@@ -1286,10 +1277,7 @@ static int setup(struct spi_device *spi)
/* SPI word width */
tmp = spi->bits_per_word;
- if (tmp == 0) {
- tmp = 8;
- spi->bits_per_word = 8;
- } else if (tmp > 16) {
+ if (tmp > 16) {
status = -EINVAL;
dev_err(&spi->dev,
"setup - "
@@ -1481,6 +1469,9 @@ static int __init spi_imx_probe(struct platform_device *pdev)
drv_data->master_info = platform_info;
drv_data->pdev = pdev;
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
master->bus_num = pdev->id;
master->num_chipselect = platform_info->num_chipselect;
master->dma_alignment = DMA_ALIGNMENT;
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index a32ccb44065..ce61be98e06 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -419,10 +419,6 @@ static void mpc83xx_spi_work(struct work_struct *work)
spin_unlock_irq(&mpc83xx_spi->lock);
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
- | SPI_LSB_FIRST | SPI_LOOP)
-
static int mpc83xx_spi_setup(struct spi_device *spi)
{
struct mpc83xx_spi *mpc83xx_spi;
@@ -430,12 +426,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
u32 hw_mode;
struct spi_mpc83xx_cs *cs = spi->controller_state;
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
if (!spi->max_speed_hz)
return -EINVAL;
@@ -447,9 +437,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
}
mpc83xx_spi = spi_master_get_devdata(spi->master);
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
hw_mode = cs->hw_mode; /* Save orginal settings */
cs->hw_mode = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode);
/* mask out bits we are going to set */
@@ -471,9 +458,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
return retval;
}
- dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u Hz\n",
- __func__, spi->mode & (SPI_CPOL | SPI_CPHA),
- spi->bits_per_word, spi->max_speed_hz);
#if 0 /* Don't think this is needed */
/* NOTE we _need_ to call chipselect() early, ideally with adapter
* setup, unless the hardware defaults cooperate to avoid confusion
@@ -568,6 +552,10 @@ mpc83xx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
dev_set_drvdata(dev, master);
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH
+ | SPI_LSB_FIRST | SPI_LOOP;
+
master->setup = mpc83xx_spi_setup;
master->transfer = mpc83xx_spi_transfer;
master->cleanup = mpc83xx_spi_cleanup;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index b3ebc1d0f85..e0d44af4745 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -146,32 +146,16 @@ static int s3c24xx_spi_setupxfer(struct spi_device *spi,
return 0;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
static int s3c24xx_spi_setup(struct spi_device *spi)
{
int ret;
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
- if (spi->mode & ~MODEBITS) {
- dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
- spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
ret = s3c24xx_spi_setupxfer(spi, NULL);
if (ret < 0) {
dev_err(&spi->dev, "setupxfer returned %d\n", ret);
return ret;
}
- dev_dbg(&spi->dev, "%s: mode %d, %u bpw, %d hz\n",
- __func__, spi->mode, spi->bits_per_word,
- spi->max_speed_hz);
-
return 0;
}
@@ -290,6 +274,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
/* setup the master state. */
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
master->num_chipselect = hw->pdata->num_cs;
master->bus_num = pdata->bus_num;
diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c
index 29cbb065618..96057de133a 100644
--- a/drivers/spi/spi_txx9.c
+++ b/drivers/spi/spi_txx9.c
@@ -110,23 +110,17 @@ static void txx9spi_cs_func(struct spi_device *spi, struct txx9spi *c,
ndelay(cs_delay); /* CS Setup Time / CS Recovery Time */
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CS_HIGH|SPI_CPOL|SPI_CPHA)
-
static int txx9spi_setup(struct spi_device *spi)
{
struct txx9spi *c = spi_master_get_devdata(spi->master);
u8 bits_per_word;
- if (spi->mode & ~MODEBITS)
- return -EINVAL;
-
if (!spi->max_speed_hz
|| spi->max_speed_hz > c->max_speed_hz
|| spi->max_speed_hz < c->min_speed_hz)
return -EINVAL;
- bits_per_word = spi->bits_per_word ? : 8;
+ bits_per_word = spi->bits_per_word;
if (bits_per_word != 8 && bits_per_word != 16)
return -EINVAL;
@@ -414,6 +408,9 @@ static int __init txx9spi_probe(struct platform_device *dev)
(unsigned long long)res->start, irq,
(c->baseclk + 500000) / 1000000);
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA;
+
master->bus_num = dev->id;
master->setup = txx9spi_setup;
master->transfer = txx9spi_transfer;
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 494d3f756e2..46b8c5c2f45 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -158,9 +158,6 @@ static int xilinx_spi_setup_transfer(struct spi_device *spi,
return 0;
}
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA)
-
static int xilinx_spi_setup(struct spi_device *spi)
{
struct spi_bitbang *bitbang;
@@ -170,22 +167,10 @@ static int xilinx_spi_setup(struct spi_device *spi)
xspi = spi_master_get_devdata(spi->master);
bitbang = &xspi->bitbang;
- if (!spi->bits_per_word)
- spi->bits_per_word = 8;
-
- if (spi->mode & ~MODEBITS) {
- dev_err(&spi->dev, "%s, unsupported mode bits %x\n",
- __func__, spi->mode & ~MODEBITS);
- return -EINVAL;
- }
-
retval = xilinx_spi_setup_transfer(spi, NULL);
if (retval < 0)
return retval;
- dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n",
- __func__, spi->mode & MODEBITS, spi->bits_per_word, 0);
-
return 0;
}
@@ -333,6 +318,9 @@ static int __init xilinx_spi_of_probe(struct of_device *ofdev,
goto put_master;
}
+ /* the spi->mode bits understood by this driver: */
+ master->mode_bits = SPI_CPOL | SPI_CPHA;
+
xspi = spi_master_get_devdata(master);
xspi->bitbang.master = spi_master_get(master);
xspi->bitbang.chipselect = xilinx_spi_chipselect;
diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c
index a411702413d..6f8866d6a90 100644
--- a/drivers/w1/masters/w1-gpio.c
+++ b/drivers/w1/masters/w1-gpio.c
@@ -74,6 +74,9 @@ static int __init w1_gpio_probe(struct platform_device *pdev)
if (err)
goto free_gpio;
+ if (pdata->enable_external_pullup)
+ pdata->enable_external_pullup(1);
+
platform_set_drvdata(pdev, master);
return 0;
@@ -91,6 +94,9 @@ static int __exit w1_gpio_remove(struct platform_device *pdev)
struct w1_bus_master *master = platform_get_drvdata(pdev);
struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+ if (pdata->enable_external_pullup)
+ pdata->enable_external_pullup(0);
+
w1_remove_master_device(master);
gpio_free(pdata->pin);
kfree(master);
@@ -98,12 +104,41 @@ static int __exit w1_gpio_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM
+
+static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+ if (pdata->enable_external_pullup)
+ pdata->enable_external_pullup(0);
+
+ return 0;
+}
+
+static int w1_gpio_resume(struct platform_device *pdev)
+{
+ struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+ if (pdata->enable_external_pullup)
+ pdata->enable_external_pullup(1);
+
+ return 0;
+}
+
+#else
+#define w1_gpio_suspend NULL
+#define w1_gpio_resume NULL
+#endif
+
static struct platform_driver w1_gpio_driver = {
.driver = {
.name = "w1-gpio",
.owner = THIS_MODULE,
},
.remove = __exit_p(w1_gpio_remove),
+ .suspend = w1_gpio_suspend,
+ .resume = w1_gpio_resume,
};
static int __init w1_gpio_init(void)