summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/processor_perflib.c21
-rw-r--r--drivers/cdrom/gdrom.c4
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c4
-rw-r--r--drivers/char/mxser.c6
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/firmware/iscsi_ibft_find.c1
-rw-r--r--drivers/ide/ppc/pmac.c13
-rw-r--r--drivers/input/keyboard/maple_keyb.c1
-rw-r--r--drivers/md/dm-table.c29
-rw-r--r--drivers/misc/Kconfig25
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/sgi-gru/Makefile3
-rw-r--r--drivers/misc/sgi-gru/gru.h67
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h669
-rw-r--r--drivers/misc/sgi-gru/grufault.c633
-rw-r--r--drivers/misc/sgi-gru/grufile.c485
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h663
-rw-r--r--drivers/misc/sgi-gru/grukservices.c679
-rw-r--r--drivers/misc/sgi-gru/grukservices.h134
-rw-r--r--drivers/misc/sgi-gru/grulib.h97
-rw-r--r--drivers/misc/sgi-gru/grumain.c802
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c336
-rw-r--r--drivers/misc/sgi-gru/grutables.h609
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c372
-rw-r--r--drivers/misc/sgi-xp/Makefile10
-rw-r--r--drivers/misc/sgi-xp/xp.h225
-rw-r--r--drivers/misc/sgi-xp/xp_main.c131
-rw-r--r--drivers/misc/sgi-xp/xp_sn2.c146
-rw-r--r--drivers/misc/sgi-xp/xp_uv.c72
-rw-r--r--drivers/misc/sgi-xp/xpc.h1200
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c1585
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c974
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c928
-rw-r--r--drivers/misc/sgi-xp/xpc_sn2.c2404
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c1443
-rw-r--r--drivers/misc/sgi-xp/xpnet.c277
-rw-r--r--drivers/mmc/card/block.c17
-rw-r--r--drivers/mmc/card/mmc_test.c85
-rw-r--r--drivers/mmc/core/core.c5
-rw-r--r--drivers/mmc/host/au1xmmc.c8
-rw-r--r--drivers/mmc/host/sdhci-pci.c3
-rw-r--r--drivers/mmc/host/sdhci.c15
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/mtd/mtdsuper.c33
-rw-r--r--drivers/net/niu.c2
-rw-r--r--drivers/net/ps3_gelic_wireless.c12
-rw-r--r--drivers/net/wireless/ath5k/base.c99
-rw-r--r--drivers/net/wireless/ath5k/base.h2
-rw-r--r--drivers/net/wireless/ath5k/hw.c4
-rw-r--r--drivers/net/wireless/b43/main.c3
-rw-r--r--drivers/net/wireless/b43/xmit.c2
-rw-r--r--drivers/net/wireless/b43legacy/main.c5
-rw-r--r--drivers/net/wireless/b43legacy/xmit.c2
-rw-r--r--drivers/net/wireless/ipw2100.c3
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-core.c3
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-debug.h8
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-led.c4
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-scan.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-tx.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl3945-base.c5
-rw-r--r--drivers/net/wireless/libertas/persistcfg.c30
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/rt2x00/rt2500usb.c13
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00.h6
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00config.c2
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00dev.c7
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00lib.h7
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00mac.c29
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00queue.c36
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00queue.h2
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00usb.c32
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00usb.h22
-rw-r--r--drivers/net/wireless/rt2x00/rt61pci.c4
-rw-r--r--drivers/net/wireless/rt2x00/rt73usb.c56
-rw-r--r--drivers/net/wireless/rtl8187.h11
-rw-r--r--drivers/net/wireless/rtl8187_dev.c93
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c1
-rw-r--r--drivers/power/Kconfig7
-rw-r--r--drivers/power/Makefile1
-rw-r--r--drivers/power/olpc_battery.c273
-rw-r--r--drivers/power/power_supply_sysfs.c1
-rw-r--r--drivers/power/tosa_battery.c486
-rw-r--r--drivers/regulator/Kconfig59
-rw-r--r--drivers/regulator/Makefile12
-rw-r--r--drivers/regulator/bq24022.c167
-rw-r--r--drivers/regulator/core.c1903
-rw-r--r--drivers/regulator/fixed.c129
-rw-r--r--drivers/regulator/virtual.c345
-rw-r--r--drivers/rtc/interface.c10
-rw-r--r--drivers/rtc/rtc-dev.c4
-rw-r--r--drivers/s390/block/dasd_alias.c4
-rw-r--r--drivers/s390/block/dasd_devmap.c16
-rw-r--r--drivers/s390/block/dasd_eckd.c147
-rw-r--r--drivers/s390/block/dasd_eckd.h184
-rw-r--r--drivers/s390/block/dasd_int.h1
-rw-r--r--drivers/s390/char/sclp.c6
-rw-r--r--drivers/s390/char/sclp_cmd.c5
-rw-r--r--drivers/s390/char/sclp_config.c13
-rw-r--r--drivers/s390/cio/idset.c8
-rw-r--r--drivers/s390/cio/qdio_main.c2
-rw-r--r--drivers/s390/cio/qdio_perf.c2
-rw-r--r--drivers/s390/cio/qdio_setup.c4
-rw-r--r--drivers/s390/kvm/kvm_virtio.c2
-rw-r--r--drivers/s390/net/qeth_core.h5
-rw-r--r--drivers/s390/net/qeth_core_main.c63
-rw-r--r--drivers/s390/net/qeth_l2_main.c50
-rw-r--r--drivers/s390/net/qeth_l3_main.c51
-rw-r--r--drivers/serial/8250.c4
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/cpm_uart/cpm_uart.h11
-rw-r--r--drivers/serial/cpm_uart/cpm_uart_core.c66
-rw-r--r--drivers/serial/sh-sci.h12
-rw-r--r--drivers/serial/v850e_uart.c548
-rw-r--r--drivers/sh/maple/maple.c265
-rw-r--r--drivers/usb/gadget/m66592-udc.c2
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/arkfb.c9
-rw-r--r--drivers/video/backlight/hp680_bl.c2
-rw-r--r--drivers/video/backlight/platform_lcd.c4
-rw-r--r--drivers/video/console/sticore.c30
-rw-r--r--drivers/video/gbefb.c50
-rw-r--r--drivers/video/hitfb.c2
-rw-r--r--drivers/video/pvr2fb.c6
-rw-r--r--drivers/video/vt8623fb.c9
-rw-r--r--drivers/watchdog/hpwdt.c2
127 files changed, 15691 insertions, 5027 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 54ec5e718c0..a280ab3d083 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -97,3 +97,4 @@ obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_VIRTIO) += virtio/
+obj-$(CONFIG_REGULATOR) += regulator/
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index b4749969c6b..0133af49cf0 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex);
* policy is adjusted accordingly.
*/
-static unsigned int ignore_ppc = 0;
+/* ignore_ppc:
+ * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet
+ * ignore _PPC
+ * 0 -> cpufreq low level drivers initialized -> consider _PPC values
+ * 1 -> ignore _PPC totally -> forced by user through boot param
+ */
+static unsigned int ignore_ppc = -1;
module_param(ignore_ppc, uint, 0644);
MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
"limited by BIOS, this should help");
@@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
#define PPC_REGISTERED 1
#define PPC_IN_USE 2
-static int acpi_processor_ppc_status = 0;
+static int acpi_processor_ppc_status;
static int acpi_processor_ppc_notifier(struct notifier_block *nb,
unsigned long event, void *data)
@@ -81,13 +87,18 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
struct acpi_processor *pr;
unsigned int ppc = 0;
- if (ignore_ppc)
+ if (event == CPUFREQ_START && ignore_ppc <= 0) {
+ ignore_ppc = 0;
return 0;
+ }
- mutex_lock(&performance_mutex);
+ if (ignore_ppc)
+ return 0;
if (event != CPUFREQ_INCOMPATIBLE)
- goto out;
+ return 0;
+
+ mutex_lock(&performance_mutex);
pr = per_cpu(processors, policy->cpu);
if (!pr || !pr->performance)
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 71ec426ecff..1e0455bd6df 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -39,8 +39,8 @@
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/delay.h>
-#include <asm/mach/dma.h>
-#include <asm/mach/sysasic.h>
+#include <mach/dma.h>
+#include <mach/sysasic.h>
#define GDROM_DEV_NAME "gdrom"
#define GD_SESSION_OFFSET 150
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 192688344ed..f52931e1c16 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -66,8 +66,8 @@
#include <linux/ctype.h>
#ifdef CONFIG_PPC_OF
-#include <asm/of_device.h>
-#include <asm/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
#endif
#define PFX "ipmi_si: "
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index e30575e8764..b638403e8e9 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -1612,8 +1612,10 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
switch (cmd) {
case MOXA_GET_MAJOR:
- printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
- "your userspace\n", current->comm, cmd);
+ if (printk_ratelimit())
+ printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl "
+ "%x (GET_MAJOR), fix your userspace\n",
+ current->comm, cmd);
return put_user(ttymajor, (int __user *)argp);
case MOXA_CHKPORTENABLE:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8d6a3ff0267..8a67f16987d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
policy->user_policy.min = policy->cpuinfo.min_freq;
policy->user_policy.max = policy->cpuinfo.max_freq;
+ blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+ CPUFREQ_START, policy);
+
#ifdef CONFIG_SMP
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 11f17440fea..d53fbbfefa3 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -81,4 +81,3 @@ void __init reserve_ibft_region(void)
if (ibft_addr)
reserve_bootmem(pos, PAGE_ALIGN(len), BOOTMEM_DEFAULT);
}
-EXPORT_SYMBOL_GPL(reserve_ibft_region);
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index c521bf6e1bf..fa2be26272d 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1086,6 +1086,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
/* Make sure we have sane timings */
sanitize_timings(pmif);
+ host = ide_host_alloc(&d, hws);
+ if (host == NULL)
+ return -ENOMEM;
+ hwif = host->ports[0];
+
#ifndef CONFIG_PPC64
/* XXX FIXME: Media bay stuff need re-organizing */
if (np->parent && np->parent->name
@@ -1119,11 +1124,11 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id,
pmif->mediabay ? " (mediabay)" : "", hw->irq);
- rc = ide_host_add(&d, hws, &host);
- if (rc)
+ rc = ide_host_register(host, &d, hws);
+ if (rc) {
+ ide_host_free(host);
return rc;
-
- hwif = host->ports[0];
+ }
return 0;
}
diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 2b404284c28..7797ef6e5e6 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/timer.h>
#include <linux/maple.h>
-#include <asm/mach/maple.h>
/* Very simple mutex to ensure proper cleanup */
static DEFINE_MUTEX(maple_keyb_mutex);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 798e468103b..61f44140923 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -316,29 +316,12 @@ static inline int check_space(struct dm_table *t)
*/
static int lookup_device(const char *path, dev_t *dev)
{
- int r;
- struct nameidata nd;
- struct inode *inode;
-
- if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd)))
- return r;
-
- inode = nd.path.dentry->d_inode;
- if (!inode) {
- r = -ENOENT;
- goto out;
- }
-
- if (!S_ISBLK(inode->i_mode)) {
- r = -ENOTBLK;
- goto out;
- }
-
- *dev = inode->i_rdev;
-
- out:
- path_put(&nd.path);
- return r;
+ struct block_device *bdev = lookup_bdev(path);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+ *dev = bdev->bd_dev;
+ bdput(bdev);
+ return 0;
}
/*
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index f5ade1904aa..fa50e9ede0e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -426,7 +426,7 @@ config ENCLOSURE_SERVICES
config SGI_XP
tristate "Support communication between SGI SSIs"
- depends on IA64_GENERIC || IA64_SGI_SN2
+ depends on IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || (X86_64 && SMP)
select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
---help---
@@ -450,4 +450,27 @@ config HP_ILO
To compile this driver as a module, choose M here: the
module will be called hpilo.
+config SGI_GRU
+ tristate "SGI GRU driver"
+ depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+ default n
+ select MMU_NOTIFIER
+ ---help---
+ The GRU is a hardware resource located in the system chipset. The GRU
+ contains memory that can be mmapped into the user address space. This memory is
+ used to communicate with the GRU to perform functions such as load/store,
+ scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user
+ instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+ user virtual addresses for operands.
+
+ If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+ bool "SGI GRU driver debug"
+ depends on SGI_GRU
+ default n
+ ---help---
+ This option enables addition debugging code for the SGI GRU driver. If
+ you are unsure, say N.
+
endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index f5e273420c0..c6c13f60b45 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -28,4 +28,5 @@ obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
obj-$(CONFIG_KGDB_TESTS) += kgdbts.o
obj-$(CONFIG_SGI_XP) += sgi-xp/
+obj-$(CONFIG_SGI_GRU) += sgi-gru/
obj-$(CONFIG_HP_ILO) += hpilo.o
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 00000000000..d03597a521b
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
+
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 00000000000..40df7cb3f0a
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES 64
+#define GRU_HANDLE_STRIDE 256
+#define GRU_CB_BASE 0
+#define GRU_DS_BASE 0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined CONFIG_IA64
+#define GRU_GSEG_PAGESIZE (256 * 1024UL)
+#elif defined CONFIG_X86_64
+#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+ int node;
+ int chiplet;
+ int blade;
+ int total_dsr_bytes;
+ int total_cbr;
+ int total_user_dsr_bytes;
+ int total_user_cbr;
+ int free_user_dsr_bytes;
+ int free_user_cbr;
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
+#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to
+ handle fault */
+#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */
+#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */
+
+
+
+#endif /* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 00000000000..0dc36225c7c
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+#define gru_flush_cache_hook(p)
+#define gru_emulator_wait_hook(p, w)
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined CONFIG_IA64
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p) ia64_fc(p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_int(p,v) \
+ do { \
+ barrier(); \
+ *((volatile int *)(p)) = v; /* force st.rel */ \
+ } while (0)
+#elif defined CONFIG_X86_64
+#define __flush_cache(p) clflush(p)
+#define gru_ordered_store_int(p,v) \
+ do { \
+ barrier(); \
+ *(int *)p = v; \
+ } while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE 0
+#define CBS_EXCEPTION 1
+#define CBS_ACTIVE 2
+#define CBS_CALL_OS 3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK 7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR 0
+#define CBSS_LB_OVERFLOWED 1
+#define CBSS_QLIMIT_REACHED 2
+#define CBSS_PAGE_OVERFLOW 3
+#define CBSS_AMO_NACKED 4
+#define CBSS_PUT_NACKED 5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+ unsigned long cb;
+ int opc;
+ int ecause;
+ int exopc;
+ long exceptdet0;
+ int exceptdet1;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+ /* DW 0 - low */
+ unsigned int icmd: 1;
+ unsigned char ima: 3; /* CB_DelRep, unmapped mode */
+ unsigned char reserved0: 4;
+ unsigned int xtype: 3;
+ unsigned int iaa0: 2;
+ unsigned int iaa1: 2;
+ unsigned char reserved1: 1;
+ unsigned char opc: 8; /* opcode */
+ unsigned char exopc: 8; /* extended opcode */
+ /* DW 0 - high */
+ unsigned int idef2: 22; /* TRi0 */
+ unsigned char reserved2: 2;
+ unsigned char istatus: 2;
+ unsigned char isubstatus:4;
+ unsigned char reserved3: 2;
+ /* DW 1 */
+ unsigned long idef4; /* 42 bits: TRi1, BufSize */
+ /* DW 2-6 */
+ unsigned long idef1; /* BAddr0 */
+ unsigned long idef5; /* Nelem */
+ unsigned long idef6; /* Stride, Operand1 */
+ unsigned long idef3; /* BAddr1, Value, Operand2 */
+ unsigned long reserved4;
+ /* DW 7 */
+ unsigned long avalue; /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+ /* DW 0 */
+ unsigned int op32; /* icmd,xtype,iaa0,ima,opc */
+ unsigned int tri0;
+ unsigned long tri1_bufsize; /* DW 1 */
+ unsigned long baddr0; /* DW 2 */
+ unsigned long nelem; /* DW 3 */
+ unsigned long op1_stride; /* DW 4 */
+ unsigned long op2_value_baddr1; /* DW 5 */
+ unsigned long reserved0; /* DW 6 */
+ unsigned long avalue; /* DW 7 */
+};
+
+/* Some shifts and masks for the low 32 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT 0
+#define GRU_CB_ICMD_MASK 0x1
+#define GRU_CB_XTYPE_SHFT 8
+#define GRU_CB_XTYPE_MASK 0x7
+#define GRU_CB_IAA0_SHFT 11
+#define GRU_CB_IAA0_MASK 0x3
+#define GRU_CB_IAA1_SHFT 13
+#define GRU_CB_IAA1_MASK 0x3
+#define GRU_CB_IMA_SHFT 1
+#define GRU_CB_IMA_MASK 0x3
+#define GRU_CB_OPC_SHFT 16
+#define GRU_CB_OPC_MASK 0xff
+#define GRU_CB_EXOPC_SHFT 24
+#define GRU_CB_EXOPC_MASK 0xff
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP 0x00
+#define OP_BCOPY 0x01
+#define OP_VLOAD 0x02
+#define OP_IVLOAD 0x03
+#define OP_VSTORE 0x04
+#define OP_IVSTORE 0x05
+#define OP_VSET 0x06
+#define OP_IVSET 0x07
+#define OP_MESQ 0x08
+#define OP_GAMXR 0x09
+#define OP_GAMIR 0x0a
+#define OP_GAMIRR 0x0b
+#define OP_GAMER 0x0c
+#define OP_GAMERR 0x0d
+#define OP_BSTORE 0x0e
+#define OP_VFLUSH 0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR 0x02 /* Fetch and clear */
+#define EOP_IR_INC 0x05 /* Fetch and increment */
+#define EOP_IR_DEC 0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC 0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */
+#define EOP_ER_OR 0x01 /* Logical OR with memory */
+#define EOP_ER_AND 0x02 /* Logical AND with memory */
+#define EOP_ER_XOR 0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD 0x04 /* Add value to memory */
+#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR 0x01 /* Logical OR with memory */
+#define EOP_ERR_AND 0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD 0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL 0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B 0x0 /* byte */
+#define XTYPE_S 0x1 /* short (2-byte) */
+#define XTYPE_W 0x2 /* word (4-byte) */
+#define XTYPE_DW 0x3 /* doubleword (8-byte) */
+#define XTYPE_CL 0x6 /* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM 0x0 /* normal cached RAM access */
+#define IAA_NCRAM 0x2 /* noncoherent RAM access */
+#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED 0x0 /* Virtual mode */
+#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */
+#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI (1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR (1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
+#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
+ CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+ CBE_CAUSE_TLBHW_ERROR | \
+ CBE_CAUSE_HA_REQUEST_TIMEOUT)
+
+/* Message queue head structure */
+union gru_mesqhead {
+ unsigned long val;
+ struct {
+ unsigned int head;
+ unsigned int limit;
+ };
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned int
+__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+ unsigned char iaa0, unsigned char iaa1,
+ unsigned char ima)
+{
+ return (1 << GRU_CB_ICMD_SHFT) |
+ (iaa0 << GRU_CB_IAA0_SHFT) |
+ (iaa1 << GRU_CB_IAA1_SHFT) |
+ (ima << GRU_CB_IMA_SHFT) |
+ (xtype << GRU_CB_XTYPE_SHFT) |
+ (opcode << GRU_CB_OPC_SHFT) |
+ (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+ __flush_cache(p);
+}
+
+/*
+ * Store the lower 32 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
+{
+ gru_ordered_store_int(ins, op32);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h) ((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ * Note:
+ * - nelem and stride are in elements
+ * - tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned int tri1, unsigned char xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+ unsigned int tri0, unsigned int tri1,
+ unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+ unsigned long value, unsigned char xtype, unsigned long nelem,
+ unsigned long stride, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op2_value_baddr1 = value;
+ ins->nelem = nelem;
+ ins->op1_stride = stride;
+ gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+ unsigned int tri1, unsigned long value, unsigned char xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op2_value_baddr1 = value;
+ ins->nelem = nelem;
+ ins->tri1_bufsize = tri1;
+ gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+ unsigned long nelem, unsigned char xtype, unsigned long stride,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)mem_addr;
+ ins->op1_stride = stride;
+ ins->nelem = nelem;
+ gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+ unsigned long dest,
+ unsigned int tri0, unsigned int xtype, unsigned long nelem,
+ unsigned int bufsize, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op2_value_baddr1 = (long)dest;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ ins->tri1_bufsize = bufsize;
+ gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
+ IAA_RAM, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+ unsigned long dest, unsigned int tri0, unsigned int xtype,
+ unsigned long nelem, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op2_value_baddr1 = (long)dest;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+ CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+ unsigned int xtype,
+ unsigned long operand1, unsigned long operand2,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op1_stride = operand1;
+ ins->op2_value_baddr1 = operand2;
+ gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+ unsigned int xtype, unsigned long operand1,
+ unsigned long operand2, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->op1_stride = operand1;
+ ins->op2_value_baddr1 = operand2;
+ gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+ unsigned int tri0, unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)src;
+ ins->nelem = 4;
+ gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+ IAA_RAM, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+ unsigned long tri0, unsigned long nelem,
+ unsigned long hints)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ ins->baddr0 = (long)queue;
+ ins->nelem = nelem;
+ ins->tri0 = tri0;
+ gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+ CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+ struct gru_instruction *ins = (void *)cb;
+
+ return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead gru_mesq_head(int head, int limit)
+{
+ union gru_mesqhead mqh;
+
+ mqh.head = head;
+ mqh.limit = limit;
+ return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+ struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE 256
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+ unsigned int icmd :1;
+ unsigned int unused1 :31;
+ unsigned int unused2 :24;
+ unsigned int istatus :2;
+ unsigned int isubstatus :4;
+ unsigned int inused3 :2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ return cbs->isubstatus;
+}
+
+/* Check the status of a CB. If the CB is in UPM mode, call the
+ * OS to handle the UPM status.
+ * Returns the CB status field value (0 for normal completion)
+ */
+static inline int gru_check_status(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+ int ret = cbs->istatus;
+
+ if (ret == CBS_CALL_OS)
+ ret = gru_check_status_proc(cb);
+ return ret;
+}
+
+/* Wait for CB to complete.
+ * Returns the CB status field value (0 for normal completion)
+ */
+static inline int gru_wait(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+ int ret = cbs->istatus;;
+
+ if (ret != CBS_IDLE)
+ ret = gru_wait_proc(cb);
+ return ret;
+}
+
+/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+ struct gru_control_block_status *cbs = (void *)cb;
+
+ if (cbs->istatus != CBS_IDLE)
+ gru_wait_abort_proc(cb);
+}
+
+
+/*
+ * Get a pointer to a control block
+ * gseg - GSeg address returned from gru_get_thread_gru_segment()
+ * index - index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+ int index)
+{
+ return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * gseg - GSeg address returned from gru_get_thread_gru_segment()
+ * index - index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+ return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * vaddr - virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+ return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif /* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 00000000000..3d33015bbf3
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,633 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+ return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_sem.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+ struct vm_area_struct *vma;
+
+ vma = find_vma(current->mm, vaddr);
+ if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+ return vma;
+ return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * - *gts with the mmap_sem locked for read and the GTS locked.
+ * - NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct gru_thread_state *gts = NULL;
+
+ down_read(&mm->mmap_sem);
+ vma = gru_find_vma(vaddr);
+ if (vma)
+ gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+ if (gts)
+ mutex_lock(&gts->ts_ctxlock);
+ else
+ up_read(&mm->mmap_sem);
+ return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct gru_thread_state *gts = NULL;
+
+ down_write(&mm->mmap_sem);
+ vma = gru_find_vma(vaddr);
+ if (vma)
+ gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+ if (gts) {
+ mutex_lock(&gts->ts_ctxlock);
+ downgrade_write(&mm->mmap_sem);
+ } else {
+ up_write(&mm->mmap_sem);
+ }
+
+ return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+ mutex_unlock(&gts->ts_ctxlock);
+ up_read(&current->mm->mmap_sem);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ *
+ * If the cb address is not valid (should not happen, but...), nothing
+ * bad will happen.. The get_user()/put_user() will fail but there
+ * are no bad side-effects.
+ */
+static void gru_cb_set_istatus_active(unsigned long __user *cb)
+{
+ union {
+ struct gru_instruction_bits bits;
+ unsigned long dw;
+ } u;
+
+ if (cb) {
+ get_user(u.dw, cb);
+ u.bits.istatus = CBS_ACTIVE;
+ put_user(u.dw, cb);
+ }
+}
+
+/*
+ * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
+ * interrupt. Interrupts are always sent to a cpu on the blade that contains the
+ * GRU (except for headless blades which are not currently supported). A blade
+ * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
+ * number uniquely identifies the GRU chiplet on the local blade that caused the
+ * interrupt. Always called in interrupt context.
+ */
+static inline struct gru_state *irq_to_gru(int irq)
+{
+ return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+ struct gru_tlb_fault_map *map)
+{
+ unsigned long i, k;
+ struct gru_tlb_fault_map *tfm;
+
+ tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+ prefetchw(tfm); /* Helps on hardware, required for emulator */
+ for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+ k = tfm->fault_bits[i];
+ if (k)
+ k = xchg(&tfm->fault_bits[i], 0UL);
+ map->fault_bits[i] = k;
+ }
+
+ /*
+ * Not functionally required but helps performance. (Required
+ * on emulator)
+ */
+ gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * returns:
+ * 0 - successful
+ * < 0 - error code
+ * 1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+ unsigned long vaddr, int write,
+ unsigned long *paddr, int *pageshift)
+{
+ struct page *page;
+
+ /* ZZZ Need to handle HUGE pages */
+ if (is_vm_hugetlb_page(vma))
+ return -EFAULT;
+ *pageshift = PAGE_SHIFT;
+ if (get_user_pages
+ (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
+ return -EFAULT;
+ *paddr = page_to_phys(page);
+ put_page(page);
+ return 0;
+}
+
+/*
+ *
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ * ZZZ - hugepage support is incomplete
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+ int write, unsigned long *paddr, int *pageshift)
+{
+ pgd_t *pgdp;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ pte_t pte;
+
+ WARN_ON(irqs_disabled()); /* ZZZ debug */
+
+ local_irq_disable();
+ pgdp = pgd_offset(vma->vm_mm, vaddr);
+ if (unlikely(pgd_none(*pgdp)))
+ goto err;
+
+ pudp = pud_offset(pgdp, vaddr);
+ if (unlikely(pud_none(*pudp)))
+ goto err;
+
+ pmdp = pmd_offset(pudp, vaddr);
+ if (unlikely(pmd_none(*pmdp)))
+ goto err;
+#ifdef CONFIG_X86_64
+ if (unlikely(pmd_large(*pmdp)))
+ pte = *(pte_t *) pmdp;
+ else
+#endif
+ pte = *pte_offset_kernel(pmdp, vaddr);
+
+ local_irq_enable();
+
+ if (unlikely(!pte_present(pte) ||
+ (write && (!pte_write(pte) || !pte_dirty(pte)))))
+ return 1;
+
+ *paddr = pte_pfn(pte) << PAGE_SHIFT;
+ *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+ return 0;
+
+err:
+ local_irq_enable();
+ return 1;
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ * Input:
+ * cb Address of user CBR. Null if not running in user context
+ * Return:
+ * 0 = dropin, exception, or switch to UPM successful
+ * 1 = range invalidate active
+ * < 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_thread_state *gts,
+ struct gru_tlb_fault_handle *tfh,
+ unsigned long __user *cb)
+{
+ struct mm_struct *mm = gts->ts_mm;
+ struct vm_area_struct *vma;
+ int pageshift, asid, write, ret;
+ unsigned long paddr, gpa, vaddr;
+
+ /*
+ * NOTE: The GRU contains magic hardware that eliminates races between
+ * TLB invalidates and TLB dropins. If an invalidate occurs
+ * in the window between reading the TFH and the subsequent TLB dropin,
+ * the dropin is ignored. This eliminates the need for additional locks.
+ */
+
+ /*
+ * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+ * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+ * is a transient state.
+ */
+ if (tfh->state == TFHSTATE_IDLE)
+ goto failidle;
+ if (tfh->state == TFHSTATE_MISS_FMM && cb)
+ goto failfmm;
+
+ write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+ vaddr = tfh->missvaddr;
+ asid = tfh->missasid;
+ if (asid == 0)
+ goto failnoasid;
+
+ rmb(); /* TFH must be cache resident before reading ms_range_active */
+
+ /*
+ * TFH is cache resident - at least briefly. Fail the dropin
+ * if a range invalidate is active.
+ */
+ if (atomic_read(&gts->ts_gms->ms_range_active))
+ goto failactive;
+
+ vma = find_vma(mm, vaddr);
+ if (!vma)
+ goto failinval;
+
+ /*
+ * Atomic lookup is faster & usually works even if called in non-atomic
+ * context.
+ */
+ ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
+ if (ret) {
+ if (!cb)
+ goto failupm;
+ if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
+ &pageshift))
+ goto failinval;
+ }
+ if (is_gru_paddr(paddr))
+ goto failinval;
+
+ paddr = paddr & ~((1UL << pageshift) - 1);
+ gpa = uv_soc_phys_ram_to_gpa(paddr);
+ gru_cb_set_istatus_active(cb);
+ tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+ GRU_PAGESIZE(pageshift));
+ STAT(tlb_dropin);
+ gru_dbg(grudev,
+ "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
+ ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
+ pageshift, gpa);
+ return 0;
+
+failnoasid:
+ /* No asid (delayed unload). */
+ STAT(tlb_dropin_fail_no_asid);
+ gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ if (!cb)
+ tfh_user_polling_mode(tfh);
+ else
+ gru_flush_cache(tfh);
+ return -EAGAIN;
+
+failupm:
+ /* Atomic failure switch CBR to UPM */
+ tfh_user_polling_mode(tfh);
+ STAT(tlb_dropin_fail_upm);
+ gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ return 1;
+
+failfmm:
+ /* FMM state on UPM call */
+ STAT(tlb_dropin_fail_fmm);
+ gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+ return 0;
+
+failidle:
+ /* TFH was idle - no miss pending */
+ gru_flush_cache(tfh);
+ if (cb)
+ gru_flush_cache(cb);
+ STAT(tlb_dropin_fail_idle);
+ gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+ return 0;
+
+failinval:
+ /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+ tfh_exception(tfh);
+ STAT(tlb_dropin_fail_invalid);
+ gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+ return -EFAULT;
+
+failactive:
+ /* Range invalidate active. Switch to UPM iff atomic */
+ if (!cb)
+ tfh_user_polling_mode(tfh);
+ else
+ gru_flush_cache(tfh);
+ STAT(tlb_dropin_fail_range_active);
+ gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+ tfh, vaddr);
+ return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+irqreturn_t gru_intr(int irq, void *dev_id)
+{
+ struct gru_state *gru;
+ struct gru_tlb_fault_map map;
+ struct gru_thread_state *gts;
+ struct gru_tlb_fault_handle *tfh = NULL;
+ int cbrnum, ctxnum;
+
+ STAT(intr);
+
+ gru = irq_to_gru(irq);
+ if (!gru) {
+ dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
+ raw_smp_processor_id(), irq);
+ return IRQ_NONE;
+ }
+ get_clear_fault_map(gru, &map);
+ gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
+ map.fault_bits[0]);
+
+ for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
+ tfh = get_tfh_by_index(gru, cbrnum);
+ prefetchw(tfh); /* Helps on hdw, required for emulator */
+
+ /*
+ * When hardware sets a bit in the faultmap, it implicitly
+ * locks the GRU context so that it cannot be unloaded.
+ * The gts cannot change until a TFH start/writestart command
+ * is issued.
+ */
+ ctxnum = tfh->ctxnum;
+ gts = gru->gs_gts[ctxnum];
+
+ /*
+ * This is running in interrupt context. Trylock the mmap_sem.
+ * If it fails, retry the fault in user context.
+ */
+ if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
+ gru_try_dropin(gts, tfh, NULL);
+ up_read(&gts->ts_mm->mmap_sem);
+ } else {
+ tfh_user_polling_mode(tfh);
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+ struct gru_tlb_fault_handle *tfh,
+ unsigned long __user *cb)
+{
+ struct gru_mm_struct *gms = gts->ts_gms;
+ int ret;
+
+ while (1) {
+ wait_event(gms->ms_wait_queue,
+ atomic_read(&gms->ms_range_active) == 0);
+ prefetchw(tfh); /* Helps on hdw, required for emulator */
+ ret = gru_try_dropin(gts, tfh, cb);
+ if (ret <= 0)
+ return ret;
+ STAT(call_os_wait_queue);
+ }
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+ struct gru_tlb_fault_handle *tfh;
+ struct gru_thread_state *gts;
+ unsigned long __user *cbp;
+ int ucbnum, cbrnum, ret = -EINVAL;
+
+ STAT(call_os);
+ gru_dbg(grudev, "address 0x%lx\n", cb);
+
+ /* sanity check the cb pointer */
+ ucbnum = get_cb_number((void *)cb);
+ if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+ return -EINVAL;
+ cbp = (unsigned long *)cb;
+
+ gts = gru_find_lock_gts(cb);
+ if (!gts)
+ return -EINVAL;
+
+ if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /*
+ * If force_unload is set, the UPM TLB fault is phony. The task
+ * has migrated to another node and the GSEG must be moved. Just
+ * unload the context. The task will page fault and assign a new
+ * context.
+ */
+ ret = -EAGAIN;
+ cbrnum = thread_cbr_number(gts, ucbnum);
+ if (gts->ts_force_unload) {
+ gru_unload_context(gts, 1);
+ } else if (gts->ts_gru) {
+ tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+ ret = gru_user_dropin(gts, tfh, cbp);
+ }
+exit:
+ gru_unlock_gts(gts);
+ return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+ struct control_block_extended_exc_detail excdet;
+ struct gru_control_block_extended *cbe;
+ struct gru_thread_state *gts;
+ int ucbnum, cbrnum, ret;
+
+ STAT(user_exception);
+ if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+ return -EFAULT;
+
+ gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
+ gts = gru_find_lock_gts(excdet.cb);
+ if (!gts)
+ return -EINVAL;
+
+ if (gts->ts_gru) {
+ ucbnum = get_cb_number((void *)excdet.cb);
+ cbrnum = thread_cbr_number(gts, ucbnum);
+ cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+ excdet.opc = cbe->opccpy;
+ excdet.exopc = cbe->exopccpy;
+ excdet.ecause = cbe->ecause;
+ excdet.exceptdet0 = cbe->idef1upd;
+ excdet.exceptdet1 = cbe->idef3upd;
+ ret = 0;
+ } else {
+ ret = -EAGAIN;
+ }
+ gru_unlock_gts(gts);
+
+ gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
+ excdet.ecause);
+ if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+ ret = -EFAULT;
+ return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+int gru_user_unload_context(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_unload_context_req req;
+
+ STAT(user_unload_context);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts)
+ return -EINVAL;
+
+ if (gts->ts_gru)
+ gru_unload_context(gts, 1);
+ gru_unlock_gts(gts);
+
+ return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_flush_tlb_req req;
+
+ STAT(user_flush_tlb);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+ req.vaddr, req.len);
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts)
+ return -EINVAL;
+
+ gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
+ gru_unlock_gts(gts);
+
+ return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_task_slice(long address)
+{
+ struct gru_thread_state *gts;
+
+ STAT(set_task_slice);
+ gru_dbg(grudev, "address 0x%lx\n", address);
+ gts = gru_alloc_locked_gts(address);
+ if (!gts)
+ return -EINVAL;
+
+ gts->ts_tgid_owner = current->tgid;
+ gru_unlock_gts(gts);
+
+ return 0;
+}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 00000000000..23c91f5f6b6
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,485 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#if defined CONFIG_X86_64
+#include <asm/genapic.h>
+#include <asm/irq.h>
+#define IS_UV() is_uv_system()
+#elif defined CONFIG_IA64
+#include <asm/system.h>
+#include <asm/sn/simulator.h>
+/* temp support for running on hardware simulator */
+#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv")
+#else
+#define IS_UV() 0
+#endif
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct file_operations gru_fops;
+static struct miscdevice gru_miscdev;
+
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+ struct gru_vma_data *vdata;
+ struct gru_thread_state *gts;
+ struct list_head *entry, *next;
+
+ if (!vma->vm_private_data)
+ return;
+
+ vdata = vma->vm_private_data;
+ vma->vm_private_data = NULL;
+ gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+ vdata);
+ list_for_each_safe(entry, next, &vdata->vd_head) {
+ gts =
+ list_entry(entry, struct gru_thread_state, ts_next);
+ list_del(&gts->ts_next);
+ mutex_lock(&gts->ts_ctxlock);
+ if (gts->ts_gru)
+ gru_unload_context(gts, 0);
+ mutex_unlock(&gts->ts_ctxlock);
+ gts_drop(gts);
+ }
+ kfree(vdata);
+ STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmaping the device. Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+ return -EPERM;
+
+ if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+ vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+ return -EINVAL;
+
+ vma->vm_flags |=
+ (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
+ VM_RESERVED);
+ vma->vm_page_prot = PAGE_SHARED;
+ vma->vm_ops = &gru_vm_ops;
+
+ vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+ if (!vma->vm_private_data)
+ return -ENOMEM;
+
+ gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+ file, vma->vm_start, vma, vma->vm_private_data);
+ return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+ struct gru_create_context_req req;
+ struct vm_area_struct *vma;
+ struct gru_vma_data *vdata;
+ int ret = -EINVAL;
+
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ if (req.data_segment_bytes == 0 ||
+ req.data_segment_bytes > max_user_dsr_bytes)
+ return -EINVAL;
+ if (!req.control_blocks || !req.maximum_thread_count ||
+ req.control_blocks > max_user_cbrs)
+ return -EINVAL;
+
+ if (!(req.options & GRU_OPT_MISS_MASK))
+ req.options |= GRU_OPT_MISS_FMM_INTR;
+
+ down_write(&current->mm->mmap_sem);
+ vma = gru_find_vma(req.gseg);
+ if (vma) {
+ vdata = vma->vm_private_data;
+ vdata->vd_user_options = req.options;
+ vdata->vd_dsr_au_count =
+ GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+ vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+ ret = 0;
+ }
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+ struct gru_config_info info;
+ int nodesperblade;
+
+ if (num_online_nodes() > 1 &&
+ (uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+ nodesperblade = 2;
+ else
+ nodesperblade = 1;
+ info.cpus = num_online_cpus();
+ info.nodes = num_online_nodes();
+ info.blades = info.nodes / nodesperblade;
+ info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+ if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * Get GRU chiplet status
+ */
+static long gru_get_chiplet_status(unsigned long arg)
+{
+ struct gru_state *gru;
+ struct gru_chiplet_info info;
+
+ if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+ return -EFAULT;
+
+ if (info.node == -1)
+ info.node = numa_node_id();
+ if (info.node >= num_possible_nodes() ||
+ info.chiplet >= GRU_CHIPLETS_PER_HUB ||
+ info.node < 0 || info.chiplet < 0)
+ return -EINVAL;
+
+ info.blade = uv_node_to_blade_id(info.node);
+ gru = get_gru(info.blade, info.chiplet);
+
+ info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
+ info.total_cbr = GRU_NUM_CB;
+ info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
+ gru->gs_reserved_dsr_bytes;
+ info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
+ info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
+ GRU_DSR_AU_BYTES;
+ info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+
+ if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+ unsigned long arg)
+{
+ int err = -EBADRQC;
+
+ gru_dbg(grudev, "file %p\n", file);
+
+ switch (req) {
+ case GRU_CREATE_CONTEXT:
+ err = gru_create_new_context(arg);
+ break;
+ case GRU_SET_TASK_SLICE:
+ err = gru_set_task_slice(arg);
+ break;
+ case GRU_USER_GET_EXCEPTION_DETAIL:
+ err = gru_get_exception_detail(arg);
+ break;
+ case GRU_USER_UNLOAD_CONTEXT:
+ err = gru_user_unload_context(arg);
+ break;
+ case GRU_GET_CHIPLET_STATUS:
+ err = gru_get_chiplet_status(arg);
+ break;
+ case GRU_USER_FLUSH_TLB:
+ err = gru_user_flush_tlb(arg);
+ break;
+ case GRU_USER_CALL_OS:
+ err = gru_handle_user_call_os(arg);
+ break;
+ case GRU_GET_CONFIG_INFO:
+ err = gru_get_config_info(arg);
+ break;
+ }
+ return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+ void *vaddr, int nid, int bid, int grunum)
+{
+ spin_lock_init(&gru->gs_lock);
+ spin_lock_init(&gru->gs_asid_lock);
+ gru->gs_gru_base_paddr = paddr;
+ gru->gs_gru_base_vaddr = vaddr;
+ gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
+ gru->gs_blade = gru_base[bid];
+ gru->gs_blade_id = bid;
+ gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+ gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+ gru_tgh_flush_init(gru);
+ gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
+ bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
+ gru->gs_gru_base_paddr);
+ gru_kservices_init(gru);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+ int pnode, nid, bid, chip;
+ int cbrs, dsrbytes, n;
+ int order = get_order(sizeof(struct gru_blade_state));
+ struct page *page;
+ struct gru_state *gru;
+ unsigned long paddr;
+ void *vaddr;
+
+ max_user_cbrs = GRU_NUM_CB;
+ max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+ for_each_online_node(nid) {
+ bid = uv_node_to_blade_id(nid);
+ pnode = uv_node_to_pnode(nid);
+ if (gru_base[bid])
+ continue;
+ page = alloc_pages_node(nid, GFP_KERNEL, order);
+ if (!page)
+ goto fail;
+ gru_base[bid] = page_address(page);
+ memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+ gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+ spin_lock_init(&gru_base[bid]->bs_lock);
+
+ dsrbytes = 0;
+ cbrs = 0;
+ for (gru = gru_base[bid]->bs_grus, chip = 0;
+ chip < GRU_CHIPLETS_PER_BLADE;
+ chip++, gru++) {
+ paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+ vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+ gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
+ n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+ cbrs = max(cbrs, n);
+ n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+ dsrbytes = max(dsrbytes, n);
+ }
+ max_user_cbrs = min(max_user_cbrs, cbrs);
+ max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+ }
+
+ return 0;
+
+fail:
+ for (nid--; nid >= 0; nid--)
+ free_pages((unsigned long)gru_base[nid], order);
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_IA64
+
+static int get_base_irq(void)
+{
+ return IRQ_GRU;
+}
+
+#elif defined CONFIG_X86_64
+
+static void noop(unsigned int irq)
+{
+}
+
+static struct irq_chip gru_chip = {
+ .name = "gru",
+ .mask = noop,
+ .unmask = noop,
+ .ack = noop,
+};
+
+static int get_base_irq(void)
+{
+ set_irq_chip(IRQ_GRU, &gru_chip);
+ set_irq_chip(IRQ_GRU + 1, &gru_chip);
+ return IRQ_GRU;
+}
+#endif
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+ int ret, irq, chip;
+ char id[10];
+ void *gru_start_vaddr;
+
+ if (!IS_UV())
+ return 0;
+
+#if defined CONFIG_IA64
+ gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+ gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+ 0x7fffffffffffUL;
+
+#endif
+ gru_start_vaddr = __va(gru_start_paddr);
+ gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
+ printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+ gru_start_paddr, gru_end_paddr);
+ irq = get_base_irq();
+ for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
+ ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: request_irq failed\n",
+ GRU_DRIVER_ID_STR);
+ goto exit1;
+ }
+ }
+
+ ret = misc_register(&gru_miscdev);
+ if (ret) {
+ printk(KERN_ERR "%s: misc_register failed\n",
+ GRU_DRIVER_ID_STR);
+ goto exit1;
+ }
+
+ ret = gru_proc_init();
+ if (ret) {
+ printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+ goto exit2;
+ }
+
+ ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+ if (ret) {
+ printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+ goto exit3;
+ }
+
+ printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+ GRU_DRIVER_VERSION_STR);
+ return 0;
+
+exit3:
+ gru_proc_exit();
+exit2:
+ misc_deregister(&gru_miscdev);
+exit1:
+ for (--chip; chip >= 0; chip--)
+ free_irq(irq + chip, NULL);
+ return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+ int i, bid;
+ int order = get_order(sizeof(struct gru_state) *
+ GRU_CHIPLETS_PER_BLADE);
+
+ for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
+ free_irq(IRQ_GRU + i, NULL);
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+ free_pages((unsigned long)gru_base[bid], order);
+
+ misc_deregister(&gru_miscdev);
+ gru_proc_exit();
+}
+
+static struct file_operations gru_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = gru_file_unlocked_ioctl,
+ .mmap = gru_file_mmap,
+};
+
+static struct miscdevice gru_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "gru",
+ .fops = &gru_fops,
+};
+
+struct vm_operations_struct gru_vm_ops = {
+ .close = gru_vma_close,
+ .fault = gru_fault,
+};
+
+module_init(gru_init);
+module_exit(gru_exit);
+
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 00000000000..d16031d6267
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,663 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * GRU HANDLE DEFINITION
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE 0
+#define GRU_MCS_BASE (64 * 1024 * 1024)
+#define GRU_SIZE (128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB 128
+#define GRU_NUM_DSR_BYTES (32 * 1024)
+#define GRU_NUM_TFM 16
+#define GRU_NUM_TGH 24
+#define GRU_NUM_CBE 128
+#define GRU_NUM_TFH 128
+#define GRU_NUM_CCH 16
+#define GRU_NUM_GSH 1
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES 64
+#define GRU_HANDLE_STRIDE 256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
+#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
+#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES 1024
+#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE 2
+#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB 2
+#define GRU_HUBS_PER_BLADE 1
+#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE 0
+#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE 0x20000
+#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#ifndef __PLUGIN__
+#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+#else
+extern void *gmu_grubase(void *h);
+#define GRUBASE(h) gmu_grubase(h)
+#endif
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+ return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+ return (void *)(get_gseg_base_address(base, ctxnum) +
+ GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+ return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+ GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+ return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+ return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+ return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+ return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+ ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+ int ctxnum)
+{
+ return (struct gru_context_configuration_handle *)(base +
+ GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+ return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+ GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+ int chiplet)
+{
+ return paddr + GRU_SIZE * (2 * pnode + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+ return vaddr + GRU_SIZE * (2 * pnode + chiplet);
+}
+
+
+
+/*
+ * Global TLB Fault Map
+ * Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+ unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+ unsigned long fill0[2];
+ unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+ unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+ unsigned int cmd:1; /* DW 0 */
+ unsigned int delresp:1;
+ unsigned int opc:1;
+ unsigned int fill1:5;
+
+ unsigned int fill2:8;
+
+ unsigned int status:2;
+ unsigned long fill3:2;
+ unsigned int state:3;
+ unsigned long fill4:1;
+
+ unsigned int cause:3;
+ unsigned long fill5:37;
+
+ unsigned long vaddr:64; /* DW 1 */
+
+ unsigned int asid:24; /* DW 2 */
+ unsigned int fill6:8;
+
+ unsigned int pagesize:5;
+ unsigned int fill7:11;
+
+ unsigned int global:1;
+ unsigned int fill8:15;
+
+ unsigned long vaddrmask:39; /* DW 3 */
+ unsigned int fill9:9;
+ unsigned int n:10;
+ unsigned int fill10:6;
+
+ unsigned int ctxbitmap:16; /* DW4 */
+ unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+ TGHCMD_START
+};
+
+enum gru_tgh_opc {
+ TGHOP_TLBNOP,
+ TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+ TGHSTATUS_IDLE,
+ TGHSTATUS_EXCEPTION,
+ TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+ TGHSTATE_IDLE,
+ TGHSTATE_PE_INVAL,
+ TGHSTATE_INTERRUPT_INVAL,
+ TGHSTATE_WAITDONE,
+ TGHSTATE_RESTART_CTX,
+};
+
+/*
+ * TFH - TLB Global Handle
+ * Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+ unsigned int cmd:1; /* DW 0 - low 32*/
+ unsigned int delresp:1;
+ unsigned int fill0:2;
+ unsigned int opc:3;
+ unsigned int fill1:9;
+
+ unsigned int status:2;
+ unsigned int fill2:1;
+ unsigned int color:1;
+ unsigned int state:3;
+ unsigned int fill3:1;
+
+ unsigned int cause:7; /* DW 0 - high 32 */
+ unsigned int fill4:1;
+
+ unsigned int indexway:12;
+ unsigned int fill5:4;
+
+ unsigned int ctxnum:4;
+ unsigned int fill6:12;
+
+ unsigned long missvaddr:64; /* DW 1 */
+
+ unsigned int missasid:24; /* DW 2 */
+ unsigned int fill7:8;
+ unsigned int fillasid:24;
+ unsigned int dirty:1;
+ unsigned int gaa:2;
+ unsigned long fill8:5;
+
+ unsigned long pfn:41; /* DW 3 */
+ unsigned int fill9:7;
+ unsigned int pagesize:5;
+ unsigned int fill10:11;
+
+ unsigned long fillvaddr:64; /* DW 4 */
+
+ unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+ TFHOP_NOOP,
+ TFHOP_RESTART,
+ TFHOP_WRITE_ONLY,
+ TFHOP_WRITE_RESTART,
+ TFHOP_EXCEPTION,
+ TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+ TFHSTATUS_IDLE,
+ TFHSTATUS_EXCEPTION,
+ TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+ TFHSTATE_INACTIVE,
+ TFHSTATE_IDLE,
+ TFHSTATE_MISS_UPM,
+ TFHSTATE_MISS_FMM,
+ TFHSTATE_HW_ERR,
+ TFHSTATE_WRITE_TLB,
+ TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+ TFHCAUSE_NONE,
+ TFHCAUSE_TLB_MISS,
+ TFHCAUSE_TLB_MOD,
+ TFHCAUSE_HW_ERROR_RR,
+ TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+ TFHCAUSE_HW_ERROR_VALID,
+ TFHCAUSE_HW_ERROR_PAGESIZE,
+ TFHCAUSE_INSTRUCTION_EXCEPTION,
+ TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM 0x0
+#define GAA_NCRAM 0x2
+#define GAA_MMIO 0x1
+#define GAA_REGISTER 0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT 12
+
+/*
+ * Context Configuration handle
+ * Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+ unsigned int cmd:1; /* DW0 */
+ unsigned int delresp:1;
+ unsigned int opc:3;
+ unsigned int unmap_enable:1;
+ unsigned int req_slice_set_enable:1;
+ unsigned int req_slice:2;
+ unsigned int cb_int_enable:1;
+ unsigned int tlb_int_enable:1;
+ unsigned int tfm_fault_bit_enable:1;
+ unsigned int tlb_int_select:4;
+
+ unsigned int status:2;
+ unsigned int state:2;
+ unsigned int reserved2:4;
+
+ unsigned int cause:4;
+ unsigned int tfm_done_bit_enable:1;
+ unsigned int unused:3;
+
+ unsigned int dsr_allocation_map;
+
+ unsigned long cbr_allocation_map; /* DW1 */
+
+ unsigned int asid[8]; /* DW 2 - 5 */
+ unsigned short sizeavail[8]; /* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+ CCHOP_START = 1,
+ CCHOP_ALLOCATE,
+ CCHOP_INTERRUPT,
+ CCHOP_DEALLOCATE,
+ CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+ CCHSTATUS_IDLE,
+ CCHSTATUS_EXCEPTION,
+ CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+ CCHSTATE_INACTIVE,
+ CCHSTATE_MAPPED,
+ CCHSTATE_ACTIVE,
+ CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+ CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+ CCHCAUSE_ILLEGAL_OPCODE = 2,
+ CCHCAUSE_INVALID_START_REQUEST = 3,
+ CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+ CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+ CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+ CCHCAUSE_CCH_BUSY = 7,
+ CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+ CCHCAUSE_BAD_TFM_CONFIG = 9,
+ CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+ CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+ CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+ unsigned int reserved0:1; /* DW 0 - low */
+ unsigned int imacpy:3;
+ unsigned int reserved1:4;
+ unsigned int xtypecpy:3;
+ unsigned int iaa0cpy:2;
+ unsigned int iaa1cpy:2;
+ unsigned int reserved2:1;
+ unsigned int opccpy:8;
+ unsigned int exopccpy:8;
+
+ unsigned int idef2cpy:22; /* DW 0 - high */
+ unsigned int reserved3:10;
+
+ unsigned int idef4cpy:22; /* DW 1 */
+ unsigned int reserved4:10;
+ unsigned int idef4upd:22;
+ unsigned int reserved5:10;
+
+ unsigned long idef1upd:64; /* DW 2 */
+
+ unsigned long idef5cpy:64; /* DW 3 */
+
+ unsigned long idef6cpy:64; /* DW 4 */
+
+ unsigned long idef3upd:64; /* DW 5 */
+
+ unsigned long idef5upd:64; /* DW 6 */
+
+ unsigned int idef2upd:22; /* DW 7 */
+ unsigned int reserved6:10;
+
+ unsigned int ecause:20;
+ unsigned int cbrstate:4;
+ unsigned int cbrexecstatus:8;
+};
+
+enum gru_cbr_state {
+ CBRSTATE_INACTIVE,
+ CBRSTATE_IDLE,
+ CBRSTATE_PE_CHECK,
+ CBRSTATE_QUEUED,
+ CBRSTATE_WAIT_RESPONSE,
+ CBRSTATE_INTERRUPTED,
+ CBRSTATE_INTERRUPTED_MISS_FMM,
+ CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+ CBRSTATE_INTERRUPTED_MISS_UPM,
+ CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+ CBRSTATE_REQUEST_ISSUE,
+ CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT 0
+#define CBR_EXS_INT_OCC_BIT 1
+#define CBR_EXS_PENDING_BIT 2
+#define CBR_EXS_QUEUED_BIT 3
+#define CBR_EXS_TLBHW_BIT 4
+#define CBR_EXS_EXCEPTION_BIT 5
+
+#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
+#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
+
+/* CBE ecause bits - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * pagesize log pagesize grupagesize
+ * 4k 12 0
+ * 16k 14 1
+ * 64k 16 2
+ * 256k 18 3
+ * 1m 20 4
+ * 2m 21 5
+ * 4m 22 6
+ * 16m 24 7
+ * 64m 26 8
+ * ...
+ */
+#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL 1024UL
+
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
+
+static inline void start_instruction(void *h)
+{
+ unsigned long *w0 = h;
+
+ wmb(); /* setting CMD bit must be last */
+ *w0 = *w0 | 1;
+ gru_flush_cache(h);
+}
+
+static inline int wait_instruction_complete(void *h)
+{
+ int status;
+
+ do {
+ cpu_relax();
+ barrier();
+ status = GET_MSEG_HANDLE_STATUS(h);
+ } while (status == CCHSTATUS_ACTIVE);
+ return status;
+}
+
+#if defined CONFIG_IA64
+static inline void cch_allocate_set_asids(
+ struct gru_context_configuration_handle *cch, int asidval)
+{
+ int i;
+
+ for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */
+ cch->asid[i] = (asidval++);
+#if 0
+ /* ZZZ hugepages not supported yet */
+ if (i == RGN_HPAGE)
+ cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
+ else
+#endif
+ cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
+ }
+}
+#elif defined CONFIG_X86_64
+static inline void cch_allocate_set_asids(
+ struct gru_context_configuration_handle *cch, int asidval)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ cch->asid[i] = asidval++;
+ cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
+ GRU_SIZEAVAIL(21);
+ }
+}
+#endif
+
+static inline int cch_allocate(struct gru_context_configuration_handle *cch,
+ int asidval, unsigned long cbrmap,
+ unsigned long dsrmap)
+{
+ cch_allocate_set_asids(cch, asidval);
+ cch->dsr_allocation_map = dsrmap;
+ cch->cbr_allocation_map = cbrmap;
+ cch->opc = CCHOP_ALLOCATE;
+ start_instruction(cch);
+ return wait_instruction_complete(cch);
+}
+
+static inline int cch_start(struct gru_context_configuration_handle *cch)
+{
+ cch->opc = CCHOP_START;
+ start_instruction(cch);
+ return wait_instruction_complete(cch);
+}
+
+static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+ cch->opc = CCHOP_INTERRUPT;
+ start_instruction(cch);
+ return wait_instruction_complete(cch);
+}
+
+static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+ cch->opc = CCHOP_DEALLOCATE;
+ start_instruction(cch);
+ return wait_instruction_complete(cch);
+}
+
+static inline int cch_interrupt_sync(struct gru_context_configuration_handle
+ *cch)
+{
+ cch->opc = CCHOP_INTERRUPT_SYNC;
+ start_instruction(cch);
+ return wait_instruction_complete(cch);
+}
+
+static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+ unsigned long vaddr, unsigned long vaddrmask,
+ int asid, int pagesize, int global, int n,
+ unsigned short ctxbitmap)
+{
+ tgh->vaddr = vaddr;
+ tgh->asid = asid;
+ tgh->pagesize = pagesize;
+ tgh->n = n;
+ tgh->global = global;
+ tgh->vaddrmask = vaddrmask;
+ tgh->ctxbitmap = ctxbitmap;
+ tgh->opc = TGHOP_TLBINV;
+ start_instruction(tgh);
+ return wait_instruction_complete(tgh);
+}
+
+static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
+ unsigned long pfn, unsigned long vaddr,
+ int asid, int dirty, int pagesize)
+{
+ tfh->fillasid = asid;
+ tfh->fillvaddr = vaddr;
+ tfh->pfn = pfn;
+ tfh->dirty = dirty;
+ tfh->pagesize = pagesize;
+ tfh->opc = TFHOP_WRITE_ONLY;
+ start_instruction(tfh);
+}
+
+static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+ unsigned long paddr, int gaa,
+ unsigned long vaddr, int asid, int dirty,
+ int pagesize)
+{
+ tfh->fillasid = asid;
+ tfh->fillvaddr = vaddr;
+ tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+ tfh->gaa = gaa;
+ tfh->dirty = dirty;
+ tfh->pagesize = pagesize;
+ tfh->opc = TFHOP_WRITE_RESTART;
+ start_instruction(tfh);
+}
+
+static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
+{
+ tfh->opc = TFHOP_RESTART;
+ start_instruction(tfh);
+}
+
+static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+ tfh->opc = TFHOP_USER_POLLING_MODE;
+ start_instruction(tfh);
+}
+
+static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+ tfh->opc = TFHOP_EXCEPTION;
+ start_instruction(tfh);
+}
+
+#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 00000000000..dfd49af0fe1
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,679 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * KERNEL SERVICES THAT USE THE GRU
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * At boot time, the kernel permanently reserves a fixed number of
+ * CBRs/DSRs for each cpu to use. The resources are all taken from
+ * the GRU chiplet 1 on the blade. This leaves the full set of resources
+ * of chiplet 0 available to be allocated to a single user.
+ */
+
+/* Blade percpu resources PERMANENTLY reserved for kernel use */
+#define GRU_NUM_KERNEL_CBR 1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define KERNEL_CTXNUM 15
+
+/* GRU instruction attributes for all instructions */
+#define IMA IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__ \
+ __attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC 0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT 3
+
+/* Status of message queue sections */
+#define MQS_EMPTY 0
+#define MQS_FULL 1
+#define MQS_NOOP 2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+ union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
+ int qlines; /* DW 1 */
+ long hstatus[2];
+ void *next __gru_cacheline_aligned__;/* CL 1 */
+ void *limit;
+ void *start;
+ void *start2;
+ char data ____cacheline_aligned; /* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+ char present;
+ char present2;
+ char lines;
+ char fill;
+};
+
+#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines))
+#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
+
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs;
+ int lcpu;
+
+ BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+ preempt_disable();
+ bs = gru_base[uv_numa_blade_id()];
+ lcpu = uv_blade_processor_id();
+ *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+ *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+ return 0;
+}
+
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+ preempt_enable();
+}
+
+int gru_get_cb_exception_detail(void *cb,
+ struct control_block_extended_exc_detail *excdet)
+{
+ struct gru_control_block_extended *cbe;
+
+ cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
+ excdet->opc = cbe->opccpy;
+ excdet->exopc = cbe->exopccpy;
+ excdet->ecause = cbe->ecause;
+ excdet->exceptdet0 = cbe->idef1upd;
+ excdet->exceptdet1 = cbe->idef3upd;
+ return 0;
+}
+
+char *gru_get_cb_exception_detail_str(int ret, void *cb,
+ char *buf, int size)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ struct control_block_extended_exc_detail excdet;
+
+ if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+ gru_get_cb_exception_detail(cb, &excdet);
+ snprintf(buf, size,
+ "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+ "excdet0 0x%lx, excdet1 0x%x",
+ gen, excdet.opc, excdet.exopc, excdet.ecause,
+ excdet.exceptdet0, excdet.exceptdet1);
+ } else {
+ snprintf(buf, size, "No exception");
+ }
+ return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+ while (gen->istatus >= CBS_ACTIVE) {
+ cpu_relax();
+ barrier();
+ }
+ return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ struct control_block_extended_exc_detail excdet;
+ int retry = EXCEPTION_RETRY_LIMIT;
+
+ while (1) {
+ if (gru_get_cb_message_queue_substatus(cb))
+ break;
+ if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+ return CBS_IDLE;
+
+ gru_get_cb_exception_detail(cb, &excdet);
+ if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+ break;
+ if (retry-- == 0)
+ break;
+ gen->icmd = 1;
+ gru_flush_cache(gen);
+ }
+ return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ int ret;
+
+ ret = gen->istatus;
+ if (ret != CBS_EXCEPTION)
+ return ret;
+ return gru_retry_exception(cb);
+
+}
+
+int gru_wait_proc(void *cb)
+{
+ struct gru_control_block_status *gen = (void *)cb;
+ int ret;
+
+ ret = gru_wait_idle_or_exception(gen);
+ if (ret == CBS_EXCEPTION)
+ ret = gru_retry_exception(cb);
+
+ return ret;
+}
+
+void gru_abort(int ret, void *cb, char *str)
+{
+ char buf[GRU_EXC_STR_SIZE];
+
+ panic("GRU FATAL ERROR: %s - %s\n", str,
+ gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+ int ret;
+
+ ret = gru_wait_proc(cb);
+ if (ret)
+ gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN -1 /* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+ struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+ return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+ struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+ mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(void *p, unsigned int bytes)
+{
+ struct message_queue *mq = p;
+ unsigned int qlines;
+
+ qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+ memset(mq, 0, bytes);
+ mq->start = &mq->data;
+ mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+ mq->next = &mq->data;
+ mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+ mq->qlines = qlines;
+ mq->hstatus[0] = 0;
+ mq->hstatus[1] = 1;
+ mq->head = gru_mesq_head(2, qlines / 2 + 1);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * Returns:
+ * 0 - if queue is full after the send. This is the normal case
+ * but various races can change this.
+ * -1 - if mesq sent successfully but queue not full
+ * >0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb,
+ unsigned long mq, void *mesg)
+{
+ const struct message_header noop_header = {
+ .present = MQS_NOOP, .lines = 1};
+ unsigned long m;
+ int substatus, ret;
+ struct message_header save_mhdr, *mhdr = mesg;
+
+ STAT(mesq_noop);
+ save_mhdr = *mhdr;
+ *mhdr = noop_header;
+ gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
+ ret = gru_wait(cb);
+
+ if (ret) {
+ substatus = gru_get_cb_message_queue_substatus(cb);
+ switch (substatus) {
+ case CBSS_NO_ERROR:
+ STAT(mesq_noop_unexpected_error);
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_LB_OVERFLOWED:
+ STAT(mesq_noop_lb_overflow);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_QLIMIT_REACHED:
+ STAT(mesq_noop_qlimit_reached);
+ ret = 0;
+ break;
+ case CBSS_AMO_NACKED:
+ STAT(mesq_noop_amo_nacked);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_PUT_NACKED:
+ STAT(mesq_noop_put_nacked);
+ m = mq + (gru_get_amo_value_head(cb) << 6);
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+ IMA);
+ if (gru_wait(cb) == CBS_IDLE)
+ ret = MQIE_AGAIN;
+ else
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_PAGE_OVERFLOW:
+ default:
+ BUG();
+ }
+ }
+ *mhdr = save_mhdr;
+ return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb,
+ unsigned long mq, void *mesg, int lines)
+{
+ union gru_mesqhead mqh;
+ unsigned int limit, head;
+ unsigned long avalue;
+ int half, qlines, save;
+
+ /* Determine if switching to first/second half of q */
+ avalue = gru_get_amo_value(cb);
+ head = gru_get_amo_value_head(cb);
+ limit = gru_get_amo_value_limit(cb);
+
+ /*
+ * Fetch "qlines" from the queue header. Since the queue may be
+ * in memory that can't be accessed using socket addresses, use
+ * the GRU to access the data. Use DSR space from the message.
+ */
+ save = *(int *)mesg;
+ gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ qlines = *(int *)mesg;
+ *(int *)mesg = save;
+ half = (limit != qlines);
+
+ if (half)
+ mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+ else
+ mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+ /* Try to get lock for switching head pointer */
+ gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ if (!gru_get_amo_value(cb)) {
+ STAT(mesq_qf_locked);
+ return MQE_QUEUE_FULL;
+ }
+
+ /* Got the lock. Send optional NOP if queue not full, */
+ if (head != limit) {
+ if (send_noop_message(cb, mq, mesg)) {
+ gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
+ XTYPE_DW, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ STAT(mesq_qf_noop_not_full);
+ return MQIE_AGAIN;
+ }
+ avalue++;
+ }
+
+ /* Then flip queuehead to other half of queue. */
+ gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+
+ /* If not successfully in swapping queue head, clear the hstatus lock */
+ if (gru_get_amo_value(cb) != avalue) {
+ STAT(mesq_qf_switch_head_failed);
+ gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ goto cberr;
+ }
+ return MQIE_AGAIN;
+cberr:
+ STAT(mesq_qf_unexpected_error);
+ return MQE_UNEXPECTED_CB_ERR;
+}
+
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb,
+ unsigned long mq,
+ void *mesg,
+ int lines)
+{
+ int substatus, ret = 0;
+ unsigned long m;
+
+ substatus = gru_get_cb_message_queue_substatus(cb);
+ switch (substatus) {
+ case CBSS_NO_ERROR:
+ STAT(mesq_send_unexpected_error);
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ case CBSS_LB_OVERFLOWED:
+ STAT(mesq_send_lb_overflow);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_QLIMIT_REACHED:
+ STAT(mesq_send_qlimit_reached);
+ ret = send_message_queue_full(cb, mq, mesg, lines);
+ break;
+ case CBSS_AMO_NACKED:
+ STAT(mesq_send_amo_nacked);
+ ret = MQE_CONGESTION;
+ break;
+ case CBSS_PUT_NACKED:
+ STAT(mesq_send_put_nacked);
+ m =mq + (gru_get_amo_value_head(cb) << 6);
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) == CBS_IDLE)
+ ret = MQE_OK;
+ else
+ ret = MQE_UNEXPECTED_CB_ERR;
+ break;
+ default:
+ BUG();
+ }
+ return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * cb GRU control block to use to send message
+ * mq message queue
+ * mesg message. ust be vaddr within a GSEG
+ * bytes message size (<= 2 CL)
+ */
+int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
+{
+ struct message_header *mhdr;
+ void *cb;
+ void *dsr;
+ int istatus, clines, ret;
+
+ STAT(mesq_send);
+ BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+ clines = (bytes + GRU_CACHE_LINE_BYTES - 1) / GRU_CACHE_LINE_BYTES;
+ if (gru_get_cpu_resources(bytes, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ memcpy(dsr, mesg, bytes);
+ mhdr = dsr;
+ mhdr->present = MQS_FULL;
+ mhdr->lines = clines;
+ if (clines == 2) {
+ mhdr->present2 = get_present2(mhdr);
+ restore_present2(mhdr, MQS_FULL);
+ }
+
+ do {
+ ret = MQE_OK;
+ gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
+ istatus = gru_wait(cb);
+ if (istatus != CBS_IDLE)
+ ret = send_message_failure(cb, mq, dsr, clines);
+ } while (ret == MQIE_AGAIN);
+ gru_free_cpu_resources(cb, dsr);
+
+ if (ret)
+ STAT(mesq_send_failed);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(void *rmq, void *mesg)
+{
+ struct message_queue *mq = rmq;
+ struct message_header *mhdr = mq->next;
+ void *next, *pnext;
+ int half = -1;
+ int lines = mhdr->lines;
+
+ if (lines == 2)
+ restore_present2(mhdr, MQS_EMPTY);
+ mhdr->present = MQS_EMPTY;
+
+ pnext = mq->next;
+ next = pnext + GRU_CACHE_LINE_BYTES * lines;
+ if (next == mq->limit) {
+ next = mq->start;
+ half = 1;
+ } else if (pnext < mq->start2 && next >= mq->start2) {
+ half = 0;
+ }
+
+ if (half >= 0)
+ mq->hstatus[half] = 1;
+ mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * rmq message queue
+ */
+void *gru_get_next_message(void *rmq)
+{
+ struct message_queue *mq = rmq;
+ struct message_header *mhdr = mq->next;
+ int present = mhdr->present;
+
+ /* skip NOOP messages */
+ STAT(mesq_receive);
+ while (present == MQS_NOOP) {
+ gru_free_message(rmq, mhdr);
+ mhdr = mq->next;
+ present = mhdr->present;
+ }
+
+ /* Wait for both halves of 2 line messages */
+ if (present == MQS_FULL && mhdr->lines == 2 &&
+ get_present2(mhdr) == MQS_EMPTY)
+ present = MQS_EMPTY;
+
+ if (!present) {
+ STAT(mesq_receive_none);
+ return NULL;
+ }
+
+ if (mhdr->lines == 2)
+ restore_present2(mhdr, mhdr->present2);
+
+ return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+ unsigned int bytes)
+{
+ void *cb;
+ void *dsr;
+ int ret;
+
+ STAT(copy_gpa);
+ if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+ XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
+ ret = gru_wait(cb);
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* Temp - will delete after we gain confidence in the GRU */
+static __cacheline_aligned unsigned long word0;
+static __cacheline_aligned unsigned long word1;
+
+static int quicktest(struct gru_state *gru)
+{
+ void *cb;
+ void *ds;
+ unsigned long *p;
+
+ cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
+ ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
+ p = ds;
+ word0 = MAGIC;
+
+ gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ BUG();
+
+ if (*(unsigned long *)ds != MAGIC)
+ BUG();
+ gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ BUG();
+
+ if (word0 != word1 || word0 != MAGIC) {
+ printk
+ ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
+ gru->gs_gid, word1, MAGIC);
+ BUG(); /* ZZZ should not be fatal */
+ }
+
+ return 0;
+}
+
+
+int gru_kservices_init(struct gru_state *gru)
+{
+ struct gru_blade_state *bs;
+ struct gru_context_configuration_handle *cch;
+ unsigned long cbr_map, dsr_map;
+ int err, num, cpus_possible;
+
+ /*
+ * Currently, resources are reserved ONLY on the second chiplet
+ * on each blade. This leaves ALL resources on chiplet 0 available
+ * for user code.
+ */
+ bs = gru->gs_blade;
+ if (gru != &bs->bs_grus[1])
+ return 0;
+
+ cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+ num = GRU_NUM_KERNEL_CBR * cpus_possible;
+ cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
+ gru->gs_reserved_cbrs += num;
+
+ num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
+ dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
+ gru->gs_reserved_dsr_bytes += num;
+
+ gru->gs_active_contexts++;
+ __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
+ cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
+
+ bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
+ KERNEL_CTXNUM, 0);
+ bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
+ KERNEL_CTXNUM, 0);
+
+ lock_cch_handle(cch);
+ cch->tfm_fault_bit_enable = 0;
+ cch->tlb_int_enable = 0;
+ cch->tfm_done_bit_enable = 0;
+ cch->unmap_enable = 1;
+ err = cch_allocate(cch, 0, cbr_map, dsr_map);
+ if (err) {
+ gru_dbg(grudev,
+ "Unable to allocate kernel CCH: gru %d, err %d\n",
+ gru->gs_gid, err);
+ BUG();
+ }
+ if (cch_start(cch)) {
+ gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
+ gru->gs_gid, err);
+ BUG();
+ }
+ unlock_cch_handle(cch);
+
+ if (gru_options & GRU_QUICKLOOK)
+ quicktest(gru);
+ return 0;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 00000000000..eb17e0a3ac6
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,134 @@
+
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * - single receiver
+ * - multiple senders
+ * - cross partition message
+ *
+ * Missing features ZZZ:
+ * - user options for dealing with timeouts, queue full, etc.
+ * - gru_create_message_queue() needs interrupt vector info
+ */
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ * Input:
+ * p pointer to user allocated memory.
+ * bytes size of message queue in bytes
+ *
+ * Errors:
+ * 0 OK
+ * >0 error
+ */
+extern int gru_create_message_queue(void *p, unsigned int bytes);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ * Input:
+ * xmq message queue - must be a UV global physical address
+ * mesg pointer to message. Must be 64-bit aligned
+ * bytes size of message in bytes
+ *
+ * Output:
+ * 0 message sent
+ * >0 Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
+ unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK 0 /* message sent successfully */
+#define MQE_CONGESTION 1 /* temporary congestion, try again */
+#define MQE_QUEUE_FULL 2 /* queue is full */
+#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */
+#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ * Input
+ * mq message queue
+ * mesq message being freed
+ */
+extern void gru_free_message(void *mq, void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ * Input
+ * mq message queue
+ *
+ * Output:
+ * p pointer to message
+ * NULL no message available
+ */
+extern void *gru_get_next_message(void *mq);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ * Input:
+ * dest_gpa destination global physical address
+ * src_gpa source global physical address
+ * bytes number of bytes to copy
+ *
+ * Output:
+ * 0 OK
+ * >0 error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+ unsigned int bytes);
+
+#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 00000000000..e56e196a699
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME "gru"
+#define GRU_FULLNAME "/dev/gru"
+#define GRU_IOCTL_NUM 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS 32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/* Register task as using the slice */
+#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For fetching GRU chiplet status */
+#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
+
+#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+ unsigned long gseg;
+ unsigned int data_segment_bytes;
+ unsigned int control_blocks;
+ unsigned int maximum_thread_count;
+ unsigned int options;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+ unsigned long gseg;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+ unsigned long gseg;
+ unsigned long vaddr;
+ size_t len;
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+ int cpus;
+ int blades;
+ int nodes;
+ int chiplets;
+ int fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 00000000000..0eeb8dddd2f
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,802 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long gru_options __read_mostly;
+
+static struct device_driver gru_driver = {
+ .name = "gru"
+};
+
+static struct device gru_device = {
+ .bus_id = {0},
+ .driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ * ZZZ should "shift" be used?? Depends on HT cpu numbering
+ * ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+ return uv_blade_processor_id() % GRU_NUM_TFM;
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ * Once MAX is reached, flush the TLB & start over. However,
+ * some asids may still be in use. There won't be many (percentage wise) still
+ * in use. Search active contexts & determine the value of the first
+ * asid in use ("x"s below). Set "limit" to this value.
+ * This defines a block of assignable asids.
+ *
+ * When "limit" is reached, search forward from limit+1 and determine the
+ * next block of assignable asids.
+ *
+ * Repeat until MAX_ASID is reached, then start over again.
+ *
+ * Each time MAX_ASID is reached, increment the asid generation. Since
+ * the search for in-use asids only checks contexts with GRUs currently
+ * assigned, asids in some contexts will be missed. Prior to loading
+ * a context, the asid generation of the GTS asid is rechecked. If it
+ * doesn't match the current generation, a new asid will be assigned.
+ *
+ * 0---------------x------------x---------------------x----|
+ * ^-next ^-limit ^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+ gru_dbg(grudev, "gru %p\n", gru);
+ STAT(asid_wrap);
+ gru->gs_asid_gen++;
+ gru_flush_all_tlb(gru);
+ return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+ int i, gid, inuse_asid, limit;
+
+ gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+ STAT(asid_next);
+ limit = MAX_ASID;
+ if (asid >= limit)
+ asid = gru_wrap_asid(gru);
+ gid = gru->gs_gid;
+again:
+ for (i = 0; i < GRU_NUM_CCH; i++) {
+ if (!gru->gs_gts[i])
+ continue;
+ inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+ gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
+ gru, inuse_asid, i, gru->gs_gts[i]);
+ if (inuse_asid == asid) {
+ asid += ASID_INC;
+ if (asid >= limit) {
+ /*
+ * empty range: reset the range limit and
+ * start over
+ */
+ limit = MAX_ASID;
+ if (asid >= MAX_ASID)
+ asid = gru_wrap_asid(gru);
+ goto again;
+ }
+ }
+
+ if ((inuse_asid > asid) && (inuse_asid < limit))
+ limit = inuse_asid;
+ }
+ gru->gs_asid_limit = limit;
+ gru->gs_asid = asid;
+ gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
+ limit);
+ return asid;
+}
+
+/* Assign a new ASID to a thread context. */
+static int gru_assign_asid(struct gru_state *gru)
+{
+ int asid;
+
+ spin_lock(&gru->gs_asid_lock);
+ gru->gs_asid += ASID_INC;
+ asid = gru->gs_asid;
+ if (asid >= gru->gs_asid_limit)
+ asid = gru_reset_asid_limit(gru, asid);
+ spin_unlock(&gru->gs_asid_lock);
+
+ gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+ return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+ char *idx)
+{
+ unsigned long bits = 0;
+ int i;
+
+ do {
+ i = find_first_bit(p, mmax);
+ if (i == mmax)
+ BUG();
+ __clear_bit(i, p);
+ __set_bit(i, &bits);
+ if (idx)
+ *idx++ = i;
+ } while (--n);
+ return bits;
+}
+
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
+ char *cbmap)
+{
+ return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+ cbmap);
+}
+
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
+ char *dsmap)
+{
+ return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+ dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ gru->gs_active_contexts++;
+ gts->ts_cbr_map =
+ gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
+ gts->ts_cbr_idx);
+ gts->ts_dsr_map =
+ gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ gru->gs_active_contexts--;
+ gru->gs_cbr_map |= gts->ts_cbr_map;
+ gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+ int dsr_au_count, int max_active_contexts)
+{
+ return hweight64(gru->gs_cbr_map) >= cbr_au_count
+ && hweight64(gru->gs_dsr_map) >= dsr_au_count
+ && gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
+ int ctxnum)
+{
+ struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+ unsigned short ctxbitmap = (1 << ctxnum);
+ int asid;
+
+ spin_lock(&gms->ms_asid_lock);
+ asid = asids->mt_asid;
+
+ if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
+ asid = gru_assign_asid(gru);
+ asids->mt_asid = asid;
+ asids->mt_asid_gen = gru->gs_asid_gen;
+ STAT(asid_new);
+ } else {
+ STAT(asid_reuse);
+ }
+
+ BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+ asids->mt_ctxbitmap |= ctxbitmap;
+ if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+ __set_bit(gru->gs_gid, gms->ms_asidmap);
+ spin_unlock(&gms->ms_asid_lock);
+
+ gru_dbg(grudev,
+ "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
+ gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
+ return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+ struct gru_mm_struct *gms, int ctxnum)
+{
+ struct gru_mm_tracker *asids;
+ unsigned short ctxbitmap;
+
+ asids = &gms->ms_asids[gru->gs_gid];
+ ctxbitmap = (1 << ctxnum);
+ spin_lock(&gms->ms_asid_lock);
+ BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+ asids->mt_ctxbitmap ^= ctxbitmap;
+ gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+ gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
+ spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+ if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
+ gru_drop_mmu_notifier(gts->ts_gms);
+ kfree(gts);
+ STAT(gts_free);
+ }
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+ *vdata, int tsid)
+{
+ struct gru_thread_state *gts;
+
+ list_for_each_entry(gts, &vdata->vd_head, ts_next)
+ if (gts->ts_tsid == tsid)
+ return gts;
+ return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ struct gru_vma_data *vdata,
+ int tsid)
+{
+ struct gru_thread_state *gts;
+ int bytes;
+
+ bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
+ CBR_BYTES(vdata->vd_cbr_au_count);
+ bytes += sizeof(struct gru_thread_state);
+ gts = kzalloc(bytes, GFP_KERNEL);
+ if (!gts)
+ return NULL;
+
+ STAT(gts_alloc);
+ atomic_set(&gts->ts_refcnt, 1);
+ mutex_init(&gts->ts_ctxlock);
+ gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
+ gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
+ gts->ts_user_options = vdata->vd_user_options;
+ gts->ts_tsid = tsid;
+ gts->ts_user_options = vdata->vd_user_options;
+ gts->ts_ctxnum = NULLCTX;
+ gts->ts_mm = current->mm;
+ gts->ts_vma = vma;
+ gts->ts_tlb_int_select = -1;
+ gts->ts_gms = gru_register_mmu_notifier();
+ if (!gts->ts_gms)
+ goto err;
+
+ gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
+ return gts;
+
+err:
+ gts_drop(gts);
+ return NULL;
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+ struct gru_vma_data *vdata = NULL;
+
+ vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+ if (!vdata)
+ return NULL;
+
+ INIT_LIST_HEAD(&vdata->vd_head);
+ spin_lock_init(&vdata->vd_lock);
+ gru_dbg(grudev, "alloc vdata %p\n", vdata);
+ return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+ int tsid)
+{
+ struct gru_vma_data *vdata = vma->vm_private_data;
+ struct gru_thread_state *gts;
+
+ spin_lock(&vdata->vd_lock);
+ gts = gru_find_current_gts_nolock(vdata, tsid);
+ spin_unlock(&vdata->vd_lock);
+ gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+ return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+ int tsid)
+{
+ struct gru_vma_data *vdata = vma->vm_private_data;
+ struct gru_thread_state *gts, *ngts;
+
+ gts = gru_alloc_gts(vma, vdata, tsid);
+ if (!gts)
+ return NULL;
+
+ spin_lock(&vdata->vd_lock);
+ ngts = gru_find_current_gts_nolock(vdata, tsid);
+ if (ngts) {
+ gts_drop(gts);
+ gts = ngts;
+ STAT(gts_double_allocate);
+ } else {
+ list_add(&gts->ts_next, &vdata->vd_head);
+ }
+ spin_unlock(&vdata->vd_lock);
+ gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+ return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru;
+
+ gru = gts->ts_gru;
+ gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
+
+ spin_lock(&gru->gs_lock);
+ gru->gs_gts[gts->ts_ctxnum] = NULL;
+ free_gru_resources(gru, gts);
+ BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+ __clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+ gts->ts_ctxnum = NULLCTX;
+ gts->ts_gru = NULL;
+ spin_unlock(&gru->gs_lock);
+
+ gts_drop(gts);
+ STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+ while (num-- > 0) {
+ prefetchw(p);
+ p += stride;
+ }
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+ memcpy(d, s, GRU_HANDLE_BYTES);
+ return GRU_HANDLE_BYTES;
+}
+
+/* rewrite in assembly & use lots of prefetch */
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+ unsigned long cbrmap, unsigned long dsrmap)
+{
+ void *gseg, *cb, *cbe;
+ unsigned long length;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+ prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+ GRU_CACHE_LINE_BYTES);
+
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+ prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+ GRU_CACHE_LINE_BYTES);
+ cb += GRU_HANDLE_STRIDE;
+ }
+
+ cb = gseg + GRU_CB_BASE;
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ save += gru_copy_handle(cb, save);
+ save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
+ cb += GRU_HANDLE_STRIDE;
+ }
+
+ memcpy(gseg + GRU_DS_BASE, save, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+ unsigned long cbrmap, unsigned long dsrmap)
+{
+ void *gseg, *cb, *cbe;
+ unsigned long length;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+ save += gru_copy_handle(save, cb);
+ save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+ cb += GRU_HANDLE_STRIDE;
+ }
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+ memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+ struct gru_state *gru = gts->ts_gru;
+ struct gru_context_configuration_handle *cch;
+ int ctxnum = gts->ts_ctxnum;
+
+ zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+ lock_cch_handle(cch);
+ if (cch_interrupt_sync(cch))
+ BUG();
+ gru_dbg(grudev, "gts %p\n", gts);
+
+ gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+ if (savestate)
+ gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+ ctxnum, gts->ts_cbr_map,
+ gts->ts_dsr_map);
+
+ if (cch_deallocate(cch))
+ BUG();
+ gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */
+ unlock_cch_handle(cch);
+
+ gru_free_gru_context(gts);
+ STAT(unload_context);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+static void gru_load_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru = gts->ts_gru;
+ struct gru_context_configuration_handle *cch;
+ int err, asid, ctxnum = gts->ts_ctxnum;
+
+ gru_dbg(grudev, "gts %p\n", gts);
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+ lock_cch_handle(cch);
+ asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+ cch->tfm_fault_bit_enable =
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+ || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+ cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+ if (cch->tlb_int_enable) {
+ gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+ cch->tlb_int_select = gts->ts_tlb_int_select;
+ }
+ cch->tfm_done_bit_enable = 0;
+ err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
+ if (err) {
+ gru_dbg(grudev,
+ "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+ err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+ BUG();
+ }
+
+ gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+ gts->ts_cbr_map, gts->ts_dsr_map);
+
+ if (cch_start(cch))
+ BUG();
+ unlock_cch_handle(cch);
+
+ STAT(load_context);
+}
+
+/*
+ * Update fields in an active CCH:
+ * - retarget interrupts on local blade
+ * - force a delayed context unload by clearing the CCH asids. This
+ * forces TLB misses for new GRU instructions. The context is unloaded
+ * when the next TLB miss occurs.
+ */
+static int gru_update_cch(struct gru_thread_state *gts, int int_select)
+{
+ struct gru_context_configuration_handle *cch;
+ struct gru_state *gru = gts->ts_gru;
+ int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+ cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+ lock_cch_handle(cch);
+ if (cch->state == CCHSTATE_ACTIVE) {
+ if (gru->gs_gts[gts->ts_ctxnum] != gts)
+ goto exit;
+ if (cch_interrupt(cch))
+ BUG();
+ if (int_select >= 0) {
+ gts->ts_tlb_int_select = int_select;
+ cch->tlb_int_select = int_select;
+ } else {
+ for (i = 0; i < 8; i++)
+ cch->asid[i] = 0;
+ cch->tfm_fault_bit_enable = 0;
+ cch->tlb_int_enable = 0;
+ gts->ts_force_unload = 1;
+ }
+ if (cch_start(cch))
+ BUG();
+ ret = 1;
+ }
+exit:
+ unlock_cch_handle(cch);
+ return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * - task's GRU context is loaded into a GRU
+ * - task is using interrupt notification for TLB faults
+ * - task has migrated to a different cpu on the same blade where
+ * it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+ if (gts->ts_tlb_int_select < 0
+ || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+ return 0;
+
+ gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+ gru_cpu_fault_map_id());
+ return gru_update_cch(gts, gru_cpu_fault_map_id());
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
+ ((g)+1) : &(b)->bs_grus[0])
+
+static void gru_steal_context(struct gru_thread_state *gts)
+{
+ struct gru_blade_state *blade;
+ struct gru_state *gru, *gru0;
+ struct gru_thread_state *ngts = NULL;
+ int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+
+ cbr = gts->ts_cbr_au_count;
+ dsr = gts->ts_dsr_au_count;
+
+ preempt_disable();
+ blade = gru_base[uv_numa_blade_id()];
+ spin_lock(&blade->bs_lock);
+
+ ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+ gru = blade->bs_lru_gru;
+ if (ctxnum == 0)
+ gru = next_gru(blade, gru);
+ ctxnum0 = ctxnum;
+ gru0 = gru;
+ while (1) {
+ if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+ break;
+ spin_lock(&gru->gs_lock);
+ for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (flag && gru == gru0 && ctxnum == ctxnum0)
+ break;
+ ngts = gru->gs_gts[ctxnum];
+ /*
+ * We are grabbing locks out of order, so trylock is
+ * needed. GTSs are usually not locked, so the odds of
+ * success are high. If trylock fails, try to steal a
+ * different GSEG.
+ */
+ if (ngts && mutex_trylock(&ngts->ts_ctxlock))
+ break;
+ ngts = NULL;
+ flag = 1;
+ }
+ spin_unlock(&gru->gs_lock);
+ if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+ break;
+ ctxnum = 0;
+ gru = next_gru(blade, gru);
+ }
+ blade->bs_lru_gru = gru;
+ blade->bs_lru_ctxnum = ctxnum;
+ spin_unlock(&blade->bs_lock);
+ preempt_enable();
+
+ if (ngts) {
+ STAT(steal_context);
+ ngts->ts_steal_jiffies = jiffies;
+ gru_unload_context(ngts, 1);
+ mutex_unlock(&ngts->ts_ctxlock);
+ } else {
+ STAT(steal_context_failed);
+ }
+ gru_dbg(grudev,
+ "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
+ " avail cb %ld, ds %ld\n",
+ gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+ hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+ struct gru_state *gru, *grux;
+ int i, max_active_contexts;
+
+ preempt_disable();
+
+again:
+ gru = NULL;
+ max_active_contexts = GRU_NUM_CCH;
+ for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
+ if (check_gru_resources(grux, gts->ts_cbr_au_count,
+ gts->ts_dsr_au_count,
+ max_active_contexts)) {
+ gru = grux;
+ max_active_contexts = grux->gs_active_contexts;
+ if (max_active_contexts == 0)
+ break;
+ }
+ }
+
+ if (gru) {
+ spin_lock(&gru->gs_lock);
+ if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+ gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+ spin_unlock(&gru->gs_lock);
+ goto again;
+ }
+ reserve_gru_resources(gru, gts);
+ gts->ts_gru = gru;
+ gts->ts_ctxnum =
+ find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+ BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
+ atomic_inc(&gts->ts_refcnt);
+ gru->gs_gts[gts->ts_ctxnum] = gts;
+ __set_bit(gts->ts_ctxnum, &gru->gs_context_map);
+ spin_unlock(&gru->gs_lock);
+
+ STAT(assign_context);
+ gru_dbg(grudev,
+ "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
+ gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+ gts->ts_gru->gs_gid, gts->ts_ctxnum,
+ gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+ } else {
+ gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+ STAT(assign_context_failed);
+ }
+
+ preempt_enable();
+ return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ *
+ * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
+ */
+int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct gru_thread_state *gts;
+ unsigned long paddr, vaddr;
+
+ vaddr = (unsigned long)vmf->virtual_address;
+ gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+ vma, vaddr, GSEG_BASE(vaddr));
+ STAT(nopfn);
+
+ /* The following check ensures vaddr is a valid address in the VMA */
+ gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+ if (!gts)
+ return VM_FAULT_SIGBUS;
+
+again:
+ preempt_disable();
+ mutex_lock(&gts->ts_ctxlock);
+ if (gts->ts_gru) {
+ if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
+ STAT(migrated_nopfn_unload);
+ gru_unload_context(gts, 1);
+ } else {
+ if (gru_retarget_intr(gts))
+ STAT(migrated_nopfn_retarget);
+ }
+ }
+
+ if (!gts->ts_gru) {
+ if (!gru_assign_gru_context(gts)) {
+ mutex_unlock(&gts->ts_ctxlock);
+ preempt_enable();
+ schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
+ if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
+ gru_steal_context(gts);
+ goto again;
+ }
+ gru_load_context(gts);
+ paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+ remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+ paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+ vma->vm_page_prot);
+ }
+
+ mutex_unlock(&gts->ts_ctxlock);
+ preempt_enable();
+
+ return VM_FAULT_NOPAGE;
+}
+
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 00000000000..533923f83f1
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,336 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f) printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+ unsigned long val = atomic_long_read(v);
+
+ if (val)
+ seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+ printstat(s, vdata_alloc);
+ printstat(s, vdata_free);
+ printstat(s, gts_alloc);
+ printstat(s, gts_free);
+ printstat(s, vdata_double_alloc);
+ printstat(s, gts_double_allocate);
+ printstat(s, assign_context);
+ printstat(s, assign_context_failed);
+ printstat(s, free_context);
+ printstat(s, load_context);
+ printstat(s, unload_context);
+ printstat(s, steal_context);
+ printstat(s, steal_context_failed);
+ printstat(s, nopfn);
+ printstat(s, break_cow);
+ printstat(s, asid_new);
+ printstat(s, asid_next);
+ printstat(s, asid_wrap);
+ printstat(s, asid_reuse);
+ printstat(s, intr);
+ printstat(s, call_os);
+ printstat(s, call_os_check_for_bug);
+ printstat(s, call_os_wait_queue);
+ printstat(s, user_flush_tlb);
+ printstat(s, user_unload_context);
+ printstat(s, user_exception);
+ printstat(s, set_task_slice);
+ printstat(s, migrate_check);
+ printstat(s, migrated_retarget);
+ printstat(s, migrated_unload);
+ printstat(s, migrated_unload_delay);
+ printstat(s, migrated_nopfn_retarget);
+ printstat(s, migrated_nopfn_unload);
+ printstat(s, tlb_dropin);
+ printstat(s, tlb_dropin_fail_no_asid);
+ printstat(s, tlb_dropin_fail_upm);
+ printstat(s, tlb_dropin_fail_invalid);
+ printstat(s, tlb_dropin_fail_range_active);
+ printstat(s, tlb_dropin_fail_idle);
+ printstat(s, tlb_dropin_fail_fmm);
+ printstat(s, mmu_invalidate_range);
+ printstat(s, mmu_invalidate_page);
+ printstat(s, mmu_clear_flush_young);
+ printstat(s, flush_tlb);
+ printstat(s, flush_tlb_gru);
+ printstat(s, flush_tlb_gru_tgh);
+ printstat(s, flush_tlb_gru_zero_asid);
+ printstat(s, copy_gpa);
+ printstat(s, mesq_receive);
+ printstat(s, mesq_receive_none);
+ printstat(s, mesq_send);
+ printstat(s, mesq_send_failed);
+ printstat(s, mesq_noop);
+ printstat(s, mesq_send_unexpected_error);
+ printstat(s, mesq_send_lb_overflow);
+ printstat(s, mesq_send_qlimit_reached);
+ printstat(s, mesq_send_amo_nacked);
+ printstat(s, mesq_send_put_nacked);
+ printstat(s, mesq_qf_not_full);
+ printstat(s, mesq_qf_locked);
+ printstat(s, mesq_qf_noop_not_full);
+ printstat(s, mesq_qf_switch_head_failed);
+ printstat(s, mesq_qf_unexpected_error);
+ printstat(s, mesq_noop_unexpected_error);
+ printstat(s, mesq_noop_lb_overflow);
+ printstat(s, mesq_noop_qlimit_reached);
+ printstat(s, mesq_noop_amo_nacked);
+ printstat(s, mesq_noop_put_nacked);
+ return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *data)
+{
+ memset(&gru_stats, 0, sizeof(gru_stats));
+ return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+ seq_printf(s, "0x%lx\n", gru_options);
+ return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *data)
+{
+ unsigned long val;
+ char buf[80];
+
+ if (copy_from_user
+ (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
+ return -EFAULT;
+ if (!strict_strtoul(buf, 10, &val))
+ gru_options = val;
+
+ return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+ long gid = *(long *)data;
+ int i;
+ struct gru_state *gru = GID_TO_GRU(gid);
+ struct gru_thread_state *ts;
+ const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+ if (gid == 0)
+ seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
+ "ctx#", "pid", "cbrs", "dsbytes", "mode");
+ if (gru)
+ for (i = 0; i < GRU_NUM_CCH; i++) {
+ ts = gru->gs_gts[i];
+ if (!ts)
+ continue;
+ seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
+ gru->gs_gid, gru->gs_blade_id, i,
+ ts->ts_tgid_owner,
+ ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+ ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+ mode[ts->ts_user_options &
+ GRU_OPT_MISS_MASK]);
+ }
+
+ return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+ long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+ struct gru_state *gru = GID_TO_GRU(gid);
+
+ if (gid == 0) {
+ seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
+ "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
+ seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
+ "busy", "busy", "free", "free", "free");
+ }
+ if (gru) {
+ ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+ cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+ dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+ seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+ gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+ GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+ ctxfree, cbrfree, dsrfree);
+ }
+
+ return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+ if (*gid < GRU_MAX_GRUS)
+ return gid;
+ return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+ (*gid)++;
+ if (*gid < GRU_MAX_GRUS)
+ return gid;
+ return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+ .start = seq_start,
+ .next = seq_next,
+ .stop = seq_stop,
+ .show = gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, options_show, NULL);
+}
+
+static int cch_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &cch_seq_ops);
+}
+
+static int gru_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &gru_seq_ops);
+}
+
+/* *INDENT-OFF* */
+static const struct file_operations statistics_fops = {
+ .open = statistics_open,
+ .read = seq_read,
+ .write = statistics_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations options_fops = {
+ .open = options_open,
+ .read = seq_read,
+ .write = options_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations cch_fops = {
+ .open = cch_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+static const struct file_operations gru_fops = {
+ .open = gru_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct proc_entry {
+ char *name;
+ int mode;
+ const struct file_operations *fops;
+ struct proc_dir_entry *entry;
+} proc_files[] = {
+ {"statistics", 0644, &statistics_fops},
+ {"debug_options", 0644, &options_fops},
+ {"cch_status", 0444, &cch_fops},
+ {"gru_status", 0444, &gru_fops},
+ {NULL}
+};
+/* *INDENT-ON* */
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+static int create_proc_file(struct proc_entry *p)
+{
+ p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+ if (!p->entry)
+ return -1;
+ p->entry->proc_fops = p->fops;
+ return 0;
+}
+
+static void delete_proc_files(void)
+{
+ struct proc_entry *p;
+
+ if (proc_gru) {
+ for (p = proc_files; p->name; p++)
+ if (p->entry)
+ remove_proc_entry(p->name, proc_gru);
+ remove_proc_entry("gru", NULL);
+ }
+}
+
+int gru_proc_init(void)
+{
+ struct proc_entry *p;
+
+ proc_mkdir("sgi_uv", NULL);
+ proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+
+ for (p = proc_files; p->name; p++)
+ if (create_proc_file(p))
+ goto err;
+ return 0;
+
+err:
+ delete_proc_files();
+ return -1;
+}
+
+void gru_proc_exit(void)
+{
+ delete_proc_files();
+}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 00000000000..4251018f70f
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,609 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * GRU Chiplet:
+ * The GRU is a user addressible memory accelerator. It provides
+ * several forms of load, store, memset, bcopy instructions. In addition, it
+ * contains special instructions for AMOs, sending messages to message
+ * queues, etc.
+ *
+ * The GRU is an integral part of the node controller. It connects
+ * directly to the cpu socket. In its current implementation, there are 2
+ * GRU chiplets in the node controller on each blade (~node).
+ *
+ * The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ * Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ * +-----------------+
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * |/////////////////|
+ * +-----------------+
+ * | system control |
+ * +-----------------+ _______ +-------------+
+ * |/////////////////| / | |
+ * |/////////////////| / | |
+ * |/////////////////| / | instructions|
+ * |/////////////////| / | |
+ * |/////////////////| / | |
+ * |/////////////////| / |-------------|
+ * |/////////////////| / | |
+ * +-----------------+ | |
+ * | context 15 | | data |
+ * +-----------------+ | |
+ * | ...... | \ | |
+ * +-----------------+ \____________ +-------------+
+ * | context 1 |
+ * +-----------------+
+ * | context 0 |
+ * +-----------------+
+ *
+ * Each of the "contexts" is a chunk of memory that can be mmaped into user
+ * space. The context consists of 2 parts:
+ *
+ * - an instruction space that can be directly accessed by the user
+ * to issue GRU instructions and to check instruction status.
+ *
+ * - a data area that acts as normal RAM.
+ *
+ * User instructions contain virtual addresses of data to be accessed by the
+ * GRU. The GRU contains a TLB that is used to convert these user virtual
+ * addresses to physical addresses.
+ *
+ * The "system control" area of the GRU chiplet is used by the kernel driver
+ * to manage user contexts and to perform functions such as TLB dropin and
+ * purging.
+ *
+ * One context may be reserved for the kernel and used for cross-partition
+ * communication. The GRU will also be used to asynchronously zero out
+ * large blocks of memory (not currently implemented).
+ *
+ *
+ * Tables:
+ *
+ * VDATA-VMA Data - Holds a few parameters. Head of linked list of
+ * GTS tables for threads using the GSEG
+ * GTS - Gru Thread State - contains info for managing a GSEG context. A
+ * GTS is allocated for each thread accessing a
+ * GSEG.
+ * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is
+ * not loaded into a GRU
+ * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ * where a GSEG has been loaded. Similar to
+ * an mm_struct but for GRU.
+ *
+ * GS - GRU State - Used to manage the state of a GRU chiplet
+ * BS - Blade State - Used to manage state of all GRU chiplets
+ * on a blade
+ *
+ *
+ * Normal task tables for task using GRU.
+ * - 2 threads in process
+ * - 2 GSEGs open in process
+ * - GSEG1 is being used by both threads
+ * - GSEG2 is used only by thread 2
+ *
+ * task -->|
+ * task ---+---> mm ->------ (notifier) -------+-> gms
+ * | |
+ * |--> vma -> vdata ---> gts--->| GSEG1 (thread1)
+ * | | |
+ * | +-> gts--->| GSEG1 (thread2)
+ * | |
+ * |--> vma -> vdata ---> gts--->| GSEG2 (thread2)
+ * .
+ * .
+ *
+ * GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * file.private_data -> NULL
+ *
+ * At mmap,
+ * vma -> vdata
+ *
+ * After gseg reference
+ * vma -> vdata ->gts
+ *
+ * After fork
+ * parent
+ * vma -> vdata -> gts
+ * child
+ * (vma is not copied)
+ *
+ */
+
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include "gru.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+
+#define GRU_MAX_BLADES MAX_NUMNODES
+#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR "0.80"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+ atomic_long_t vdata_alloc;
+ atomic_long_t vdata_free;
+ atomic_long_t gts_alloc;
+ atomic_long_t gts_free;
+ atomic_long_t vdata_double_alloc;
+ atomic_long_t gts_double_allocate;
+ atomic_long_t assign_context;
+ atomic_long_t assign_context_failed;
+ atomic_long_t free_context;
+ atomic_long_t load_context;
+ atomic_long_t unload_context;
+ atomic_long_t steal_context;
+ atomic_long_t steal_context_failed;
+ atomic_long_t nopfn;
+ atomic_long_t break_cow;
+ atomic_long_t asid_new;
+ atomic_long_t asid_next;
+ atomic_long_t asid_wrap;
+ atomic_long_t asid_reuse;
+ atomic_long_t intr;
+ atomic_long_t call_os;
+ atomic_long_t call_os_check_for_bug;
+ atomic_long_t call_os_wait_queue;
+ atomic_long_t user_flush_tlb;
+ atomic_long_t user_unload_context;
+ atomic_long_t user_exception;
+ atomic_long_t set_task_slice;
+ atomic_long_t migrate_check;
+ atomic_long_t migrated_retarget;
+ atomic_long_t migrated_unload;
+ atomic_long_t migrated_unload_delay;
+ atomic_long_t migrated_nopfn_retarget;
+ atomic_long_t migrated_nopfn_unload;
+ atomic_long_t tlb_dropin;
+ atomic_long_t tlb_dropin_fail_no_asid;
+ atomic_long_t tlb_dropin_fail_upm;
+ atomic_long_t tlb_dropin_fail_invalid;
+ atomic_long_t tlb_dropin_fail_range_active;
+ atomic_long_t tlb_dropin_fail_idle;
+ atomic_long_t tlb_dropin_fail_fmm;
+ atomic_long_t mmu_invalidate_range;
+ atomic_long_t mmu_invalidate_page;
+ atomic_long_t mmu_clear_flush_young;
+ atomic_long_t flush_tlb;
+ atomic_long_t flush_tlb_gru;
+ atomic_long_t flush_tlb_gru_tgh;
+ atomic_long_t flush_tlb_gru_zero_asid;
+
+ atomic_long_t copy_gpa;
+
+ atomic_long_t mesq_receive;
+ atomic_long_t mesq_receive_none;
+ atomic_long_t mesq_send;
+ atomic_long_t mesq_send_failed;
+ atomic_long_t mesq_noop;
+ atomic_long_t mesq_send_unexpected_error;
+ atomic_long_t mesq_send_lb_overflow;
+ atomic_long_t mesq_send_qlimit_reached;
+ atomic_long_t mesq_send_amo_nacked;
+ atomic_long_t mesq_send_put_nacked;
+ atomic_long_t mesq_qf_not_full;
+ atomic_long_t mesq_qf_locked;
+ atomic_long_t mesq_qf_noop_not_full;
+ atomic_long_t mesq_qf_switch_head_failed;
+ atomic_long_t mesq_qf_unexpected_error;
+ atomic_long_t mesq_noop_unexpected_error;
+ atomic_long_t mesq_noop_lb_overflow;
+ atomic_long_t mesq_noop_qlimit_reached;
+ atomic_long_t mesq_noop_amo_nacked;
+ atomic_long_t mesq_noop_put_nacked;
+
+};
+
+#define OPT_DPRINT 1
+#define OPT_STATS 2
+#define GRU_QUICKLOOK 4
+
+
+#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
+
+#define STAT(id) do { \
+ if (gru_options & OPT_STATS) \
+ atomic_long_inc(&gru_stats.id); \
+ } while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...) \
+ do { \
+ if (gru_options & OPT_DPRINT) \
+ dev_dbg(dev, "%s: " fmt, __func__, x); \
+ } while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID 0xfffff0
+#define MIN_ASID 8
+#define ASID_INC 8 /* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#if defined CONFIG_IA64
+#define VADDR_HI_BIT 64
+#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
+#elif defined __x86_64
+#define VADDR_HI_BIT 48
+#define GRUREGION(addr) (0) /* ZZZ could do better */
+#else
+#error "Unsupported architecture"
+#endif
+#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ * File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker {
+ unsigned int mt_asid_gen; /* ASID wrap count */
+ int mt_asid; /* current base ASID for gru */
+ unsigned short mt_ctxbitmap; /* bitmap of contexts using
+ asid */
+};
+
+struct gru_mm_struct {
+ struct mmu_notifier ms_notifier;
+ atomic_t ms_refcnt;
+ spinlock_t ms_asid_lock; /* protects ASID assignment */
+ atomic_t ms_range_active;/* num range_invals active */
+ char ms_released;
+ wait_queue_head_t ms_wait_queue;
+ DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+ struct gru_mm_tracker ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+ spinlock_t vd_lock; /* Serialize access to vma */
+ struct list_head vd_head; /* head of linked list of gts */
+ long vd_user_options;/* misc user option flags */
+ int vd_cbr_au_count;
+ int vd_dsr_au_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+ struct list_head ts_next; /* list - head at vma-private */
+ struct mutex ts_ctxlock; /* load/unload CTX lock */
+ struct mm_struct *ts_mm; /* mm currently mapped to
+ context */
+ struct vm_area_struct *ts_vma; /* vma of GRU context */
+ struct gru_state *ts_gru; /* GRU where the context is
+ loaded */
+ struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
+ unsigned long ts_cbr_map; /* map of allocated CBRs */
+ unsigned long ts_dsr_map; /* map of allocated DATA
+ resources */
+ unsigned long ts_steal_jiffies;/* jiffies when context last
+ stolen */
+ long ts_user_options;/* misc user option flags */
+ pid_t ts_tgid_owner; /* task that is using the
+ context - for migration */
+ int ts_tsid; /* thread that owns the
+ structure */
+ int ts_tlb_int_select;/* target cpu if interrupts
+ enabled */
+ int ts_ctxnum; /* context number where the
+ context is loaded */
+ atomic_t ts_refcnt; /* reference count GTS */
+ unsigned char ts_dsr_au_count;/* Number of DSR resources
+ required for contest */
+ unsigned char ts_cbr_au_count;/* Number of CBR resources
+ required for contest */
+ char ts_force_unload;/* force context to be unloaded
+ after migration */
+ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+ allocated CB */
+ unsigned long ts_gdata[0]; /* save area for GRU data (CB,
+ DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \
+ (gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX (-1) /* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ * GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+ struct gru_blade_state *gs_blade; /* GRU state for entire
+ blade */
+ unsigned long gs_gru_base_paddr; /* Physical address of
+ gru segments (64) */
+ void *gs_gru_base_vaddr; /* Virtual address of
+ gru segments (64) */
+ unsigned char gs_gid; /* unique GRU number */
+ unsigned char gs_tgh_local_shift; /* used to pick TGH for
+ local flush */
+ unsigned char gs_tgh_first_remote; /* starting TGH# for
+ remote flush */
+ unsigned short gs_blade_id; /* blade of GRU */
+ spinlock_t gs_asid_lock; /* lock used for
+ assigning asids */
+ spinlock_t gs_lock; /* lock used for
+ assigning contexts */
+
+ /* -- the following are protected by the gs_asid_lock spinlock ---- */
+ unsigned int gs_asid; /* Next availe ASID */
+ unsigned int gs_asid_limit; /* Limit of available
+ ASIDs */
+ unsigned int gs_asid_gen; /* asid generation.
+ Inc on wrap */
+
+ /* --- the following fields are protected by the gs_lock spinlock --- */
+ unsigned long gs_context_map; /* bitmap to manage
+ contexts in use */
+ unsigned long gs_cbr_map; /* bitmap to manage CB
+ resources */
+ unsigned long gs_dsr_map; /* bitmap used to manage
+ DATA resources */
+ unsigned int gs_reserved_cbrs; /* Number of kernel-
+ reserved cbrs */
+ unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel-
+ reserved dsrs */
+ unsigned short gs_active_contexts; /* number of contexts
+ in use */
+ struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
+ the context */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+ void *kernel_cb; /* First kernel
+ reserved cb */
+ void *kernel_dsr; /* First kernel
+ reserved DSR */
+ /* ---- the following are protected by the bs_lock spinlock ---- */
+ spinlock_t bs_lock; /* lock used for
+ stealing contexts */
+ int bs_lru_ctxnum; /* STEAL - last context
+ stolen */
+ struct gru_state *bs_lru_gru; /* STEAL - last gru
+ stolen */
+
+ struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c) \
+ ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i) \
+ ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i) \
+ ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i) \
+ ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+ (i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c) (&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+ * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid) \
+ (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \
+ (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \
+ bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \
+ NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map) \
+ for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
+ (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i) \
+ for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \
+ (i) < GRU_CHIPLETS_PER_BLADE; \
+ (i)++, (gru)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum) \
+ for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \
+ if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map) \
+ for ((i) = find_first_bit(map, GRU_NUM_CBE); \
+ (i) < GRU_NUM_CBE; \
+ (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k) \
+ for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \
+ (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \
+ for ((i) = (k)*GRU_CBR_AU_SIZE; \
+ (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
+#define for_each_dsr_in_allocation_map(i, map, k) \
+ for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
+ (k) < GRU_DSR_AU; \
+ (k) = find_next_bit((const unsigned long *)map, \
+ GRU_DSR_AU, (k) + 1)) \
+ for ((i) = (k) * GRU_DSR_AU_CL; \
+ (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
+
+#define gseg_physical_address(gru, ctxnum) \
+ ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum) \
+ ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+static inline void __lock_handle(void *h)
+{
+ while (test_and_set_bit(1, h))
+ cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+ clear_bit(1, h);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+ __lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+ *cch)
+{
+ __unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ __lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ __unlock_handle(tgh);
+}
+
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+ int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+ *vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+ *vma, int tsid);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(struct gru_state *gru);
+extern irqreturn_t gru_intr(int irq, void *dev_id);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_task_slice(long address);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
+ int cbr_au_count, char *cbmap);
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
+ int dsr_au_count, char *dsmap);
+extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+ unsigned long len);
+
+extern unsigned long gru_options;
+
+#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 00000000000..bcfd5425e2e
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,372 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * MMUOPS callbacks + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random() get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+ int n;
+
+ n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+ n = gru_random() % n;
+ n += gru->gs_tgh_first_remote;
+ return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+ return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+ *gru)
+{
+ struct gru_tlb_global_handle *tgh;
+ int n;
+
+ preempt_disable();
+ if (uv_numa_blade_id() == gru->gs_blade_id)
+ n = get_on_blade_tgh(gru);
+ else
+ n = get_off_blade_tgh(gru);
+ tgh = get_tgh_by_index(gru, n);
+ lock_tgh_handle(tgh);
+
+ return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+ unlock_tgh_handle(tgh);
+ preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * - GRU contexts are not loaded into a GRU unless a reference is made to
+ * the data segment or control block (this is true, not an assumption).
+ * If a DS/CB is referenced, the user will also issue instructions that
+ * cause TLBmisses. It is not necessary to optimize for the case where
+ * contexts are loaded but no instructions cause TLB misses. (I know
+ * this will happen but I'm not optimizing for it).
+ * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * a few usec but in unusual cases, it could be longer. Avoid if
+ * possible.
+ * - intrablade process migration between cpus is not frequent but is
+ * common.
+ * - a GRU context is not typically migrated to a different GRU on the
+ * blade because of intrablade migration
+ * - interblade migration is rare. Processes migrate their GRU context to
+ * the new blade.
+ * - if interblade migration occurs, migration back to the original blade
+ * is very very rare (ie., no optimization for this case)
+ * - most GRU instruction operate on a subset of the user REGIONS. Code
+ * & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * loaded into the GRU.
+ * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * the above array
+ * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ * in the GRU for the address space. This bitmap must be passed to the
+ * GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * - scan the asidmap for GRUs where the context has been loaded, ie,
+ * asid is non-zero.
+ * - for each gru found:
+ * - if the ctxtmap is non-zero, there are active contexts in the
+ * GRU. TLB invalidate instructions must be issued to the GRU.
+ * - if the ctxtmap is zero, no context is active. Set the ASID to
+ * zero to force a full TLB invalidation. This is fast but will
+ * cause a lot of TLB misses if the context is reloaded onto the
+ * GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+ unsigned long len)
+{
+ struct gru_state *gru;
+ struct gru_mm_tracker *asids;
+ struct gru_tlb_global_handle *tgh;
+ unsigned long num;
+ int grupagesize, pagesize, pageshift, gid, asid;
+
+ /* ZZZ TODO - handle huge pages */
+ pageshift = PAGE_SHIFT;
+ pagesize = (1UL << pageshift);
+ grupagesize = GRU_PAGESIZE(pageshift);
+ num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+ STAT(flush_tlb);
+ gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+ start, len, gms->ms_asidmap[0]);
+
+ spin_lock(&gms->ms_asid_lock);
+ for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+ STAT(flush_tlb_gru);
+ gru = GID_TO_GRU(gid);
+ asids = gms->ms_asids + gid;
+ asid = asids->mt_asid;
+ if (asids->mt_ctxbitmap && asid) {
+ STAT(flush_tlb_gru_tgh);
+ asid = GRUASID(asid, start);
+ gru_dbg(grudev,
+ " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
+ gid, asid, num, asids->mt_ctxbitmap);
+ tgh = get_lock_tgh_handle(gru);
+ tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
+ num - 1, asids->mt_ctxbitmap);
+ get_unlock_tgh_handle(tgh);
+ } else {
+ STAT(flush_tlb_gru_zero_asid);
+ asids->mt_asid = 0;
+ __clear_bit(gru->gs_gid, gms->ms_asidmap);
+ gru_dbg(grudev,
+ " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+ gid, asid, asids->mt_ctxbitmap,
+ gms->ms_asidmap[0]);
+ }
+ }
+ spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+ struct gru_tlb_global_handle *tgh;
+
+ gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
+ tgh = get_lock_tgh_handle(gru);
+ tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
+ get_unlock_tgh_handle(tgh);
+ preempt_enable();
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static void gru_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ STAT(mmu_invalidate_range);
+ atomic_inc(&gms->ms_range_active);
+ gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+ start, end, atomic_read(&gms->ms_range_active));
+ gru_flush_tlb_range(gms, start, end - start);
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ /* ..._and_test() provides needed barrier */
+ (void)atomic_dec_and_test(&gms->ms_range_active);
+
+ wake_up_all(&gms->ms_wait_queue);
+ gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+ unsigned long address)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ STAT(mmu_invalidate_page);
+ gru_flush_tlb_range(gms, address, PAGE_SIZE);
+ gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
+}
+
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+ ms_notifier);
+
+ gms->ms_released = 1;
+ gru_dbg(grudev, "gms %p\n", gms);
+}
+
+
+static const struct mmu_notifier_ops gru_mmuops = {
+ .invalidate_page = gru_invalidate_page,
+ .invalidate_range_start = gru_invalidate_range_start,
+ .invalidate_range_end = gru_invalidate_range_end,
+ .release = gru_release,
+};
+
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+ const struct mmu_notifier_ops *ops)
+{
+ struct mmu_notifier *mn, *gru_mn = NULL;
+ struct hlist_node *n;
+
+ if (mm->mmu_notifier_mm) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+ hlist)
+ if (mn->ops == ops) {
+ gru_mn = mn;
+ break;
+ }
+ rcu_read_unlock();
+ }
+ return gru_mn;
+}
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+ struct gru_mm_struct *gms;
+ struct mmu_notifier *mn;
+
+ mn = mmu_find_ops(current->mm, &gru_mmuops);
+ if (mn) {
+ gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+ atomic_inc(&gms->ms_refcnt);
+ } else {
+ gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+ if (gms) {
+ spin_lock_init(&gms->ms_asid_lock);
+ gms->ms_notifier.ops = &gru_mmuops;
+ atomic_set(&gms->ms_refcnt, 1);
+ init_waitqueue_head(&gms->ms_wait_queue);
+ __mmu_notifier_register(&gms->ms_notifier, current->mm);
+ }
+ }
+ gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+ atomic_read(&gms->ms_refcnt));
+ return gms;
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+ gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+ atomic_read(&gms->ms_refcnt), gms->ms_released);
+ if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+ if (!gms->ms_released)
+ mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+ kfree(gms);
+ }
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * - 24 TGH handles per GRU chiplet
+ * - a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * use by blade-local cpus
+ * - the rest are used by off-blade cpus. This usage is
+ * less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH 16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+ int cpus, shift = 0, n;
+
+ cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+ /* n = cpus rounded up to next power of 2 */
+ if (cpus) {
+ n = 1 << fls(cpus - 1);
+
+ /*
+ * shift count for converting local cpu# to TGH index
+ * 0 if cpus <= MAX_LOCAL_TGH,
+ * 1 if cpus <= 2*MAX_LOCAL_TGH,
+ * etc
+ */
+ shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+ }
+ gru->gs_tgh_local_shift = shift;
+
+ /* first starting TGH index to use for remote purges */
+ gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
index b6e40a7958c..35ce2857807 100644
--- a/drivers/misc/sgi-xp/Makefile
+++ b/drivers/misc/sgi-xp/Makefile
@@ -3,9 +3,17 @@
#
obj-$(CONFIG_SGI_XP) += xp.o
-xp-y := xp_main.o xp_nofault.o
+xp-y := xp_main.o
+xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o xp_uv.o
+xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o
+xp-$(CONFIG_X86_64) += xp_uv.o
obj-$(CONFIG_SGI_XP) += xpc.o
xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o
+xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o xpc_uv.o
+xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o
+xpc-$(CONFIG_X86_64) += xpc_uv.o
obj-$(CONFIG_SGI_XP) += xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 03a87a307e3..859a5281c61 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -13,11 +13,34 @@
#ifndef _DRIVERS_MISC_SGIXP_XP_H
#define _DRIVERS_MISC_SGIXP_XP_H
-#include <linux/cache.h>
-#include <linux/hardirq.h>
#include <linux/mutex.h>
-#include <asm/sn/types.h>
-#include <asm/sn/bte.h>
+
+#ifdef CONFIG_IA64
+#include <asm/system.h>
+#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
+#define is_shub() ia64_platform_is("sn2")
+#define is_uv() ia64_platform_is("uv")
+#endif
+#ifdef CONFIG_X86_64
+#include <asm/genapic.h>
+#define is_uv() is_uv_system()
+#endif
+
+#ifndef is_shub1
+#define is_shub1() 0
+#endif
+
+#ifndef is_shub2
+#define is_shub2() 0
+#endif
+
+#ifndef is_shub
+#define is_shub() 0
+#endif
+
+#ifndef is_uv
+#define is_uv() 0
+#endif
#ifdef USE_DBUG_ON
#define DBUG_ON(condition) BUG_ON(condition)
@@ -26,133 +49,56 @@
#endif
/*
- * Define the maximum number of logically defined partitions the system
- * can support. It is constrained by the maximum number of hardware
- * partitionable regions. The term 'region' in this context refers to the
- * minimum number of nodes that can comprise an access protection grouping.
- * The access protection is in regards to memory, IPI and IOI.
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
*
* The maximum number of hardware partitionable regions is equal to the
* maximum number of nodes in the entire system divided by the minimum number
* of nodes that comprise an access protection grouping.
*/
-#define XP_MAX_PARTITIONS 64
-
-/*
- * Define the number of u64s required to represent all the C-brick nasids
- * as a bitmap. The cross-partition kernel modules deal only with
- * C-brick nasids, thus the need for bitmaps which don't account for
- * odd-numbered (non C-brick) nasids.
- */
-#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2)
-#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8)
-#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64)
-
-/*
- * Wrapper for bte_copy() that should it return a failure status will retry
- * the bte_copy() once in the hope that the failure was due to a temporary
- * aberration (i.e., the link going down temporarily).
- *
- * src - physical address of the source of the transfer.
- * vdst - virtual address of the destination of the transfer.
- * len - number of bytes to transfer from source to destination.
- * mode - see bte_copy() for definition.
- * notification - see bte_copy() for definition.
- *
- * Note: xp_bte_copy() should never be called while holding a spinlock.
- */
-static inline bte_result_t
-xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
-{
- bte_result_t ret;
- u64 pdst = ia64_tpa(vdst);
-
- /*
- * Ensure that the physically mapped memory is contiguous.
- *
- * We do this by ensuring that the memory is from region 7 only.
- * If the need should arise to use memory from one of the other
- * regions, then modify the BUG_ON() statement to ensure that the
- * memory from that region is always physically contiguous.
- */
- BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
-
- ret = bte_copy(src, pdst, len, mode, notification);
- if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
- if (!in_interrupt())
- cond_resched();
-
- ret = bte_copy(src, pdst, len, mode, notification);
- }
-
- return ret;
-}
+#define XP_MAX_NPARTITIONS_SN2 64
+#define XP_MAX_NPARTITIONS_UV 256
/*
* XPC establishes channel connections between the local partition and any
* other partition that is currently up. Over these channels, kernel-level
* `users' can communicate with their counterparts on the other partitions.
*
- * The maxinum number of channels is limited to eight. For performance reasons,
- * the internal cross partition structures require sixteen bytes per channel,
- * and eight allows all of this interface-shared info to fit in one cache line.
- *
- * XPC_NCHANNELS reflects the total number of channels currently defined.
* If the need for additional channels arises, one can simply increase
- * XPC_NCHANNELS accordingly. If the day should come where that number
- * exceeds the MAXIMUM number of channels allowed (eight), then one will need
- * to make changes to the XPC code to allow for this.
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
*/
#define XPC_MEM_CHANNEL 0 /* memory channel number */
#define XPC_NET_CHANNEL 1 /* network channel number */
-#define XPC_NCHANNELS 2 /* #of defined channels */
-#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
+#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */
-#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
-#error XPC_NCHANNELS exceeds MAXIMUM allowed.
+#if XPC_MAX_NCHANNELS > 8
+#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
#endif
/*
- * The format of an XPC message is as follows:
- *
- * +-------+--------------------------------+
- * | flags |////////////////////////////////|
- * +-------+--------------------------------+
- * | message # |
- * +----------------------------------------+
- * | payload (user-defined message) |
- * | |
- * :
- * | |
- * +----------------------------------------+
- *
- * The size of the payload is defined by the user via xpc_connect(). A user-
- * defined message resides in the payload area.
- *
- * The user should have no dealings with the message header, but only the
- * message's payload. When a message entry is allocated (via xpc_allocate())
- * a pointer to the payload area is returned and not the actual beginning of
- * the XPC message. The user then constructs a message in the payload area
- * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
- *
- * The size of a message entry (within a message queue) must be a cacheline
- * sized multiple in order to facilitate the BTE transfer of messages from one
- * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
* that wants to fit as many msg entries as possible in a given memory size
* (e.g. a memory page).
*/
-struct xpc_msg {
- u8 flags; /* FOR XPC INTERNAL USE ONLY */
- u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
- s64 number; /* FOR XPC INTERNAL USE ONLY */
-
- u64 payload; /* user defined portion of message */
-};
+#define XPC_MSG_MAX_SIZE 128
+#define XPC_MSG_HDR_MAX_SIZE 16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
-#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
#define XPC_MSG_SIZE(_payload_size) \
- L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
+ ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+ is_uv() ? 64 : 128)
+
/*
* Define the return values and values passed to user's callout functions.
@@ -233,8 +179,20 @@ enum xp_retval {
xpDisconnected, /* 51: channel disconnected (closed) */
xpBteCopyError, /* 52: bte_copy() returned error */
+ xpSalError, /* 53: sn SAL error */
+ xpRsvdPageNotSet, /* 54: the reserved page is not set up */
+ xpPayloadTooBig, /* 55: payload too large for message slot */
+
+ xpUnsupported, /* 56: unsupported functionality or resource */
+ xpNeedMoreInfo, /* 57: more info is needed by SAL */
- xpUnknownReason /* 53: unknown reason - must be last in enum */
+ xpGruCopyError, /* 58: gru_copy_gru() returned error */
+ xpGruSendMqError, /* 59: gru send message queue related error */
+
+ xpBadChannelNumber, /* 60: invalid channel number */
+ xpBadMsgType, /* 60: invalid message type */
+
+ xpUnknownReason /* 61: unknown reason - must be last in enum */
};
/*
@@ -285,6 +243,9 @@ typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
* calling xpc_received().
*
* All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ * and thus cannot block.
*/
typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
int ch_number, void *key);
@@ -308,23 +269,22 @@ struct xpc_registration {
xpc_channel_func func; /* function to call */
void *key; /* pointer to user's key */
u16 nentries; /* #of msg entries in local msg queue */
- u16 msg_size; /* message queue's message size */
+ u16 entry_size; /* message queue's message entry size */
u32 assigned_limit; /* limit on #of assigned kthreads */
u32 idle_limit; /* limit on #of idle kthreads */
} ____cacheline_aligned;
#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
-/* the following are valid xpc_allocate() flags */
+/* the following are valid xpc_send() or xpc_send_notify() flags */
#define XPC_WAIT 0 /* wait flag */
#define XPC_NOWAIT 1 /* no wait flag */
struct xpc_interface {
void (*connect) (int);
void (*disconnect) (int);
- enum xp_retval (*allocate) (short, int, u32, void **);
- enum xp_retval (*send) (short, int, void *);
- enum xp_retval (*send_notify) (short, int, void *,
+ enum xp_retval (*send) (short, int, u32, void *, u16);
+ enum xp_retval (*send_notify) (short, int, u32, void *, u16,
xpc_notify_func, void *);
void (*received) (short, int, void *);
enum xp_retval (*partid_to_nasids) (short, void *);
@@ -334,10 +294,9 @@ extern struct xpc_interface xpc_interface;
extern void xpc_set_interface(void (*)(int),
void (*)(int),
- enum xp_retval (*)(short, int, u32, void **),
- enum xp_retval (*)(short, int, void *),
- enum xp_retval (*)(short, int, void *,
- xpc_notify_func, void *),
+ enum xp_retval (*)(short, int, u32, void *, u16),
+ enum xp_retval (*)(short, int, u32, void *, u16,
+ xpc_notify_func, void *),
void (*)(short, int, void *),
enum xp_retval (*)(short, void *));
extern void xpc_clear_interface(void);
@@ -347,22 +306,19 @@ extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
extern void xpc_disconnect(int);
static inline enum xp_retval
-xpc_allocate(short partid, int ch_number, u32 flags, void **payload)
-{
- return xpc_interface.allocate(partid, ch_number, flags, payload);
-}
-
-static inline enum xp_retval
-xpc_send(short partid, int ch_number, void *payload)
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size)
{
- return xpc_interface.send(partid, ch_number, payload);
+ return xpc_interface.send(partid, ch_number, flags, payload,
+ payload_size);
}
static inline enum xp_retval
-xpc_send_notify(short partid, int ch_number, void *payload,
- xpc_notify_func func, void *key)
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size, xpc_notify_func func, void *key)
{
- return xpc_interface.send_notify(partid, ch_number, payload, func, key);
+ return xpc_interface.send_notify(partid, ch_number, flags, payload,
+ payload_size, func, key);
}
static inline void
@@ -377,8 +333,23 @@ xpc_partid_to_nasids(short partid, void *nasids)
return xpc_interface.partid_to_nasids(partid, nasids);
}
+extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
+
+extern unsigned long (*xp_pa) (void *);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+ size_t);
+extern int (*xp_cpu_to_nasid) (int);
+
extern u64 xp_nofault_PIOR_target;
extern int xp_nofault_PIOR(void *);
extern int xp_error_PIOR(void);
+extern struct device *xp;
+extern enum xp_retval xp_init_sn2(void);
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_sn2(void);
+extern void xp_exit_uv(void);
+
#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 196480b691a..66a1d19e08a 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -14,29 +14,48 @@
*
*/
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
#include <linux/module.h>
-#include <linux/mutex.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
+#include <linux/device.h>
#include "xp.h"
-/*
- * The export of xp_nofault_PIOR needs to happen here since it is defined
- * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
- * defined here.
- */
-EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+/* define the XP debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xp_dbg_name = {
+ .name = "xp"
+};
+
+struct device xp_dbg_subname = {
+ .bus_id = {0}, /* set to "" */
+ .driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
+
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+ const unsigned long src_gpa, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
-u64 xp_nofault_PIOR_target;
-EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
/*
* xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
* users of XPC.
*/
-struct xpc_registration xpc_registrations[XPC_NCHANNELS];
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
EXPORT_SYMBOL_GPL(xpc_registrations);
/*
@@ -51,10 +70,9 @@ xpc_notloaded(void)
struct xpc_interface xpc_interface = {
(void (*)(int))xpc_notloaded,
(void (*)(int))xpc_notloaded,
- (enum xp_retval(*)(short, int, u32, void **))xpc_notloaded,
- (enum xp_retval(*)(short, int, void *))xpc_notloaded,
- (enum xp_retval(*)(short, int, void *, xpc_notify_func, void *))
- xpc_notloaded,
+ (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
+ (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
+ void *))xpc_notloaded,
(void (*)(short, int, void *))xpc_notloaded,
(enum xp_retval(*)(short, void *))xpc_notloaded
};
@@ -66,16 +84,14 @@ EXPORT_SYMBOL_GPL(xpc_interface);
void
xpc_set_interface(void (*connect) (int),
void (*disconnect) (int),
- enum xp_retval (*allocate) (short, int, u32, void **),
- enum xp_retval (*send) (short, int, void *),
- enum xp_retval (*send_notify) (short, int, void *,
+ enum xp_retval (*send) (short, int, u32, void *, u16),
+ enum xp_retval (*send_notify) (short, int, u32, void *, u16,
xpc_notify_func, void *),
void (*received) (short, int, void *),
enum xp_retval (*partid_to_nasids) (short, void *))
{
xpc_interface.connect = connect;
xpc_interface.disconnect = disconnect;
- xpc_interface.allocate = allocate;
xpc_interface.send = send;
xpc_interface.send_notify = send_notify;
xpc_interface.received = received;
@@ -91,13 +107,11 @@ xpc_clear_interface(void)
{
xpc_interface.connect = (void (*)(int))xpc_notloaded;
xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
- xpc_interface.allocate = (enum xp_retval(*)(short, int, u32,
- void **))xpc_notloaded;
- xpc_interface.send = (enum xp_retval(*)(short, int, void *))
+ xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
xpc_notloaded;
- xpc_interface.send_notify = (enum xp_retval(*)(short, int, void *,
- xpc_notify_func,
- void *))xpc_notloaded;
+ xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
+ u16, xpc_notify_func,
+ void *))xpc_notloaded;
xpc_interface.received = (void (*)(short, int, void *))
xpc_notloaded;
xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
@@ -135,11 +149,14 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
{
struct xpc_registration *registration;
- DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
DBUG_ON(payload_size == 0 || nentries == 0);
DBUG_ON(func == NULL);
DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+ if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+ return xpPayloadTooBig;
+
registration = &xpc_registrations[ch_number];
if (mutex_lock_interruptible(&registration->mutex) != 0)
@@ -152,7 +169,7 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
}
/* register the channel for connection */
- registration->msg_size = XPC_MSG_SIZE(payload_size);
+ registration->entry_size = XPC_MSG_SIZE(payload_size);
registration->nentries = nentries;
registration->assigned_limit = assigned_limit;
registration->idle_limit = idle_limit;
@@ -185,7 +202,7 @@ xpc_disconnect(int ch_number)
{
struct xpc_registration *registration;
- DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
registration = &xpc_registrations[ch_number];
@@ -206,7 +223,7 @@ xpc_disconnect(int ch_number)
registration->func = NULL;
registration->key = NULL;
registration->nentries = 0;
- registration->msg_size = 0;
+ registration->entry_size = 0;
registration->assigned_limit = 0;
registration->idle_limit = 0;
@@ -221,39 +238,21 @@ EXPORT_SYMBOL_GPL(xpc_disconnect);
int __init
xp_init(void)
{
- int ret, ch_number;
- u64 func_addr = *(u64 *)xp_nofault_PIOR;
- u64 err_func_addr = *(u64 *)xp_error_PIOR;
-
- if (!ia64_platform_is("sn2"))
- return -ENODEV;
+ enum xp_retval ret;
+ int ch_number;
- /*
- * Register a nofault code region which performs a cross-partition
- * PIO read. If the PIO read times out, the MCA handler will consume
- * the error and return to a kernel-provided instruction to indicate
- * an error. This PIO read exists because it is guaranteed to timeout
- * if the destination is down (AMO operations do not timeout on at
- * least some CPUs on Shubs <= v1.2, which unfortunately we have to
- * work around).
- */
- ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
- 1, 1);
- if (ret != 0) {
- printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
- ret);
- }
- /*
- * Setup the nofault PIO read target. (There is no special reason why
- * SH_IPI_ACCESS was selected.)
- */
- if (is_shub2())
- xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+ if (is_shub())
+ ret = xp_init_sn2();
+ else if (is_uv())
+ ret = xp_init_uv();
else
- xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+ ret = xpUnsupported;
+
+ if (ret != xpSuccess)
+ return -ENODEV;
/* initialize the connection registration mutex */
- for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++)
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
mutex_init(&xpc_registrations[ch_number].mutex);
return 0;
@@ -264,12 +263,10 @@ module_init(xp_init);
void __exit
xp_exit(void)
{
- u64 func_addr = *(u64 *)xp_nofault_PIOR;
- u64 err_func_addr = *(u64 *)xp_error_PIOR;
-
- /* unregister the PIO read nofault code region */
- (void)sn_register_nofault_code(func_addr, err_func_addr,
- err_func_addr, 1, 0);
+ if (is_shub())
+ xp_exit_sn2();
+ else if (is_uv())
+ xp_exit_uv();
}
module_exit(xp_exit);
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 00000000000..1440134caf3
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,146 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) sn2-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * Register a nofault code region which performs a cross-partition PIO read.
+ * If the PIO read times out, the MCA handler will consume the error and
+ * return to a kernel-provided instruction to indicate an error. This PIO read
+ * exists because it is guaranteed to timeout if the destination is down
+ * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * which unfortunately we have to work around).
+ */
+static enum xp_retval
+xp_register_nofault_code_sn2(void)
+{
+ int ret;
+ u64 func_addr;
+ u64 err_func_addr;
+
+ func_addr = *(u64 *)xp_nofault_PIOR;
+ err_func_addr = *(u64 *)xp_error_PIOR;
+ ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+ 1, 1);
+ if (ret != 0) {
+ dev_err(xp, "can't register nofault code, error=%d\n", ret);
+ return xpSalError;
+ }
+ /*
+ * Setup the nofault PIO read target. (There is no special reason why
+ * SH_IPI_ACCESS was selected.)
+ */
+ if (is_shub1())
+ xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+ else if (is_shub2())
+ xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+
+ return xpSuccess;
+}
+
+static void
+xp_unregister_nofault_code_sn2(void)
+{
+ u64 func_addr = *(u64 *)xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+ /* unregister the PIO read nofault code region */
+ (void)sn_register_nofault_code(func_addr, err_func_addr,
+ err_func_addr, 1, 0);
+}
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_sn2(void *addr)
+{
+ return __pa(addr);
+}
+
+/*
+ * Wrapper for bte_copy().
+ *
+ * dst_pa - physical address of the destination of the transfer.
+ * src_pa - physical address of the source of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ *
+ * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
+ */
+static enum xp_retval
+xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
+ size_t len)
+{
+ bte_result_t ret;
+
+ ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ if (ret == BTE_SUCCESS)
+ return xpSuccess;
+
+ if (is_shub2()) {
+ dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
+ "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
+ src_pa, len);
+ } else {
+ dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
+ "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
+ }
+
+ return xpBteCopyError;
+}
+
+static int
+xp_cpu_to_nasid_sn2(int cpuid)
+{
+ return cpuid_to_nasid(cpuid);
+}
+
+enum xp_retval
+xp_init_sn2(void)
+{
+ BUG_ON(!is_shub());
+
+ xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+ xp_partition_id = sn_partition_id;
+ xp_region_size = sn_region_size;
+
+ xp_pa = xp_pa_sn2;
+ xp_remote_memcpy = xp_remote_memcpy_sn2;
+ xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
+
+ return xp_register_nofault_code_sn2();
+}
+
+void
+xp_exit_sn2(void)
+{
+ BUG_ON(!is_shub());
+
+ xp_unregister_nofault_code_sn2();
+}
+
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 00000000000..d9f7ce2510b
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,72 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#include "../sgi-gru/grukservices.h"
+#include "xp.h"
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+ return uv_gpa(addr);
+}
+
+static enum xp_retval
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+ size_t len)
+{
+ int ret;
+
+ ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+ if (ret == 0)
+ return xpSuccess;
+
+ dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+ "len=%ld\n", dst_gpa, src_gpa, len);
+ return xpGruCopyError;
+}
+
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+ /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+ return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
+enum xp_retval
+xp_init_uv(void)
+{
+ BUG_ON(!is_uv());
+
+ xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+ xp_partition_id = 0; /* !!! not correct value */
+ xp_region_size = 0; /* !!! not correct value */
+
+ xp_pa = xp_pa_uv;
+ xp_remote_memcpy = xp_remote_memcpy_uv;
+ xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
+
+ return xpSuccess;
+}
+
+void
+xp_exit_uv(void)
+{
+ BUG_ON(!is_uv());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 11ac267ed68..619208d6186 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -13,18 +13,10 @@
#ifndef _DRIVERS_MISC_SGIXP_XPC_H
#define _DRIVERS_MISC_SGIXP_XPC_H
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
+#include <linux/wait.h>
#include <linux/completion.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/mspec.h>
-#include <asm/sn/shub_mmr.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
#include "xp.h"
/*
@@ -36,23 +28,7 @@
#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
-/*
- * The next macros define word or bit representations for given
- * C-brick nasid in either the SAL provided bit array representing
- * nasids in the partition/machine or the AMO_t array used for
- * inter-partition initiation communications.
- *
- * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
- * such, some space will be saved by insisting that nasid information
- * passed from SAL always be packed for C-Bricks and the
- * cross-partition interrupts use the same packing scheme.
- */
-#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
-#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
-#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
- (1UL << XPC_NASID_B_INDEX(_n)))
-#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
-
+/* define frequency of the heartbeat and frequency how often it's checked */
#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
@@ -72,11 +48,11 @@
*
* reserved page header
*
- * The first cacheline of the reserved page contains the header
- * (struct xpc_rsvd_page). Before SAL initialization has completed,
+ * The first two 64-byte cachelines of the reserved page contain the
+ * header (struct xpc_rsvd_page). Before SAL initialization has completed,
* SAL has set up the following fields of the reserved page header:
- * SAL_signature, SAL_version, partid, and nasids_size. The other
- * fields are set up by XPC. (xpc_rsvd_page points to the local
+ * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ * other fields are set up by XPC. (xpc_rsvd_page points to the local
* partition's reserved page.)
*
* part_nasids mask
@@ -87,14 +63,16 @@
* the actual nasids in the entire machine (mach_nasids). We're only
* interested in the even numbered nasids (which contain the processors
* and/or memory), so we only need half as many bits to represent the
- * nasids. The part_nasids mask is located starting at the first cacheline
- * following the reserved page header. The mach_nasids mask follows right
- * after the part_nasids mask. The size in bytes of each mask is reflected
- * by the reserved page header field 'nasids_size'. (Local partition's
- * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
+ * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ * to either divide or multiply by 2. The part_nasids mask is located
+ * starting at the first cacheline following the reserved page header. The
+ * mach_nasids mask follows right after the part_nasids mask. The size in
+ * bytes of each mask is reflected by the reserved page header field
+ * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ * and xpc_mach_nasids.)
*
- * vars
- * vars part
+ * vars (ia64-sn2 only)
+ * vars part (ia64-sn2 only)
*
* Immediately following the mach_nasids mask are the XPC variables
* required by other partitions. First are those that are generic to all
@@ -102,43 +80,26 @@
* which are partition specific (vars part). These are setup by XPC.
* (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
*
- * Note: Until vars_pa is set, the partition XPC code has not been initialized.
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
+ * initialized.
*/
struct xpc_rsvd_page {
u64 SAL_signature; /* SAL: unique signature */
u64 SAL_version; /* SAL: version */
- u8 partid; /* SAL: partition ID */
+ short SAL_partid; /* SAL: partition ID */
+ short max_npartitions; /* value of XPC_MAX_PARTITIONS */
u8 version;
- u8 pad1[6]; /* align to next u64 in cacheline */
- u64 vars_pa; /* physical address of struct xpc_vars */
- struct timespec stamp; /* time when reserved page was setup by XPC */
- u64 pad2[9]; /* align to last u64 in cacheline */
- u64 nasids_size; /* SAL: size of each nasid mask in bytes */
+ u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
+ union {
+ unsigned long vars_pa; /* phys address of struct xpc_vars */
+ unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */
+ } sn;
+ unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+ u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */
+ u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
};
-#define XPC_RP_VERSION _XPC_VERSION(1, 1) /* version 1.1 of the reserved page */
-
-#define XPC_SUPPORTS_RP_STAMP(_version) \
- (_version >= _XPC_VERSION(1, 1))
-
-/*
- * compare stamps - the return value is:
- *
- * < 0, if stamp1 < stamp2
- * = 0, if stamp1 == stamp2
- * > 0, if stamp1 > stamp2
- */
-static inline int
-xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
-{
- int ret;
-
- ret = stamp1->tv_sec - stamp2->tv_sec;
- if (ret == 0)
- ret = stamp1->tv_nsec - stamp2->tv_nsec;
-
- return ret;
-}
+#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
/*
* Define the structures by which XPC variables can be exported to other
@@ -154,85 +115,40 @@ xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
* reflected by incrementing either the major or minor version numbers
* of struct xpc_vars.
*/
-struct xpc_vars {
+struct xpc_vars_sn2 {
u8 version;
u64 heartbeat;
- u64 heartbeating_to_mask;
+ DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
u64 heartbeat_offline; /* if 0, heartbeat should be changing */
- int act_nasid;
- int act_phys_cpuid;
- u64 vars_part_pa;
- u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
- AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
+ int activate_IRQ_nasid;
+ int activate_IRQ_phys_cpuid;
+ unsigned long vars_part_pa;
+ unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
+ struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */
};
#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */
-#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
- (_version >= _XPC_VERSION(3, 1))
-
-static inline int
-xpc_hb_allowed(short partid, struct xpc_vars *vars)
-{
- return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
-}
-
-static inline void
-xpc_allow_hb(short partid, struct xpc_vars *vars)
-{
- u64 old_mask, new_mask;
-
- do {
- old_mask = vars->heartbeating_to_mask;
- new_mask = (old_mask | (1UL << partid));
- } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
- old_mask);
-}
-
-static inline void
-xpc_disallow_hb(short partid, struct xpc_vars *vars)
-{
- u64 old_mask, new_mask;
-
- do {
- old_mask = vars->heartbeating_to_mask;
- new_mask = (old_mask & ~(1UL << partid));
- } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
- old_mask);
-}
-
-/*
- * The AMOs page consists of a number of AMO variables which are divided into
- * four groups, The first two groups are used to identify an IRQ's sender.
- * These two groups consist of 64 and 128 AMO variables respectively. The last
- * two groups, consisting of just one AMO variable each, are used to identify
- * the remote partitions that are currently engaged (from the viewpoint of
- * the XPC running on the remote partition).
- */
-#define XPC_NOTIFY_IRQ_AMOS 0
-#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
-#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
-#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
-
/*
* The following structure describes the per partition specific variables.
*
* An array of these structures, one per partition, will be defined. As a
* partition becomes active XPC will copy the array entry corresponding to
- * itself from that partition. It is desirable that the size of this
- * structure evenly divide into a cacheline, such that none of the entries
- * in this array crosses a cacheline boundary. As it is now, each entry
- * occupies half a cacheline.
+ * itself from that partition. It is desirable that the size of this structure
+ * evenly divides into a 128-byte cacheline, such that none of the entries in
+ * this array crosses a 128-byte cacheline boundary. As it is now, each entry
+ * occupies 64-bytes.
*/
-struct xpc_vars_part {
+struct xpc_vars_part_sn2 {
u64 magic;
- u64 openclose_args_pa; /* physical address of open and close args */
- u64 GPs_pa; /* physical address of Get/Put values */
+ unsigned long openclose_args_pa; /* phys addr of open and close args */
+ unsigned long GPs_pa; /* physical address of Get/Put values */
+
+ unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
- u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
- int IPI_nasid; /* nasid of where to send IPIs */
- int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
+ int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
+ int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
u8 nchannels; /* #of defined channels supported */
@@ -248,20 +164,95 @@ struct xpc_vars_part {
* MAGIC2 indicates that this partition has pulled the remote partititions
* per partition variables that pertain to this partition.
*/
-#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
-#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
/* the reserved page sizes and offsets */
#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
-#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
+#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
-#define XPC_RP_PART_NASIDS(_rp) ((u64 *)((u8 *)(_rp) + XPC_RP_HEADER_SIZE))
-#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS(_rp) ((struct xpc_vars *)(XPC_RP_MACH_NASIDS(_rp) + \
- xp_nasid_mask_words))
-#define XPC_RP_VARS_PART(_rp) ((struct xpc_vars_part *) \
- ((u8 *)XPC_RP_VARS(_rp) + XPC_RP_VARS_SIZE))
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+ XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+ xpc_nasid_mask_nlongs)
+#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \
+ (XPC_RP_MACH_NASIDS(_rp) + \
+ xpc_nasid_mask_nlongs))
+
+/*
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * heartbeat, partition active state, and channel state. This is UV only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+ short partid; /* sender's partid */
+ u8 act_state; /* sender's act_state at time msg sent */
+ u8 type; /* message's type */
+ unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
+#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1
+#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2
+#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11
+
+struct xpc_activate_mq_msg_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+};
+
+struct xpc_activate_mq_msg_heartbeat_req_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ u64 heartbeat;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ unsigned long rp_gpa;
+ unsigned long activate_mq_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ short entry_size; /* size of notify_mq's GRU messages */
+ short local_nentries; /* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+ struct xpc_activate_mq_msghdr_uv hdr;
+ short ch_number;
+ short remote_nentries; /* ??? Is this needed? What is? */
+ short local_nentries; /* ??? Is this needed? What is? */
+ unsigned long local_notify_mq_gpa;
+};
/*
* Functions registered by add_timer() or called by kernel_thread() only
@@ -270,22 +261,22 @@ struct xpc_vars_part {
* the passed argument.
*/
#define XPC_PACK_ARGS(_arg1, _arg2) \
- ((((u64) _arg1) & 0xffffffff) | \
- ((((u64) _arg2) & 0xffffffff) << 32))
+ ((((u64)_arg1) & 0xffffffff) | \
+ ((((u64)_arg2) & 0xffffffff) << 32))
-#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
-#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
+#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff)
/*
* Define a Get/Put value pair (pointers) used with a message queue.
*/
-struct xpc_gp {
+struct xpc_gp_sn2 {
s64 get; /* Get value */
s64 put; /* Put value */
};
#define XPC_GP_SIZE \
- L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
+ L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
/*
* Define a structure that contains arguments associated with opening and
@@ -293,31 +284,89 @@ struct xpc_gp {
*/
struct xpc_openclose_args {
u16 reason; /* reason why channel is closing */
- u16 msg_size; /* sizeof each message entry */
+ u16 entry_size; /* sizeof each message entry */
u16 remote_nentries; /* #of message entries in remote msg queue */
u16 local_nentries; /* #of message entries in local msg queue */
- u64 local_msgqueue_pa; /* physical address of local message queue */
+ unsigned long local_msgqueue_pa; /* phys addr of local message queue */
};
#define XPC_OPENCLOSE_ARGS_SIZE \
- L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
+ L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+ XPC_MAX_NCHANNELS)
-/* struct xpc_msg flags */
-#define XPC_M_DONE 0x01 /* msg has been received/consumed */
-#define XPC_M_READY 0x02 /* msg is ready to be sent */
-#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
+/*
+ * Structures to define a fifo singly-linked list.
+ */
-#define XPC_MSG_ADDRESS(_payload) \
- ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
+struct xpc_fifo_entry_uv {
+ struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+ struct xpc_fifo_entry_uv *first;
+ struct xpc_fifo_entry_uv *last;
+ spinlock_t lock;
+ int n_entries;
+};
/*
- * Defines notify entry.
+ * Define a sn2 styled message.
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message entry (within a message queue) must be a 128-byte
+ * cacheline sized multiple in order to facilitate the BTE transfer of messages
+ * from one message queue to another.
+ */
+struct xpc_msg_sn2 {
+ u8 flags; /* FOR XPC INTERNAL USE ONLY */
+ u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
+ s64 number; /* FOR XPC INTERNAL USE ONLY */
+
+ u64 payload; /* user defined portion of message */
+};
+
+/* struct xpc_msg_sn2 flags */
+
+#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */
+#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */
+#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
+
+struct xpc_notify_mq_msghdr_uv {
+ union {
+ unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+ struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */
+ } u;
+ short partid; /* FOR XPC INTERNAL USE ONLY */
+ u8 ch_number; /* FOR XPC INTERNAL USE ONLY */
+ u8 size; /* FOR XPC INTERNAL USE ONLY */
+ unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+ struct xpc_notify_mq_msghdr_uv hdr;
+ unsigned long payload;
+};
+
+/*
+ * Define sn2's notify entry.
*
* This is used to notify a message's sender that their message was received
* and consumed by the intended recipient.
*/
-struct xpc_notify {
+struct xpc_notify_sn2 {
u8 type; /* type of notification */
/* the following two fields are only used if type == XPC_N_CALL */
@@ -325,9 +374,20 @@ struct xpc_notify {
void *key; /* pointer to user's key */
};
-/* struct xpc_notify type of notification */
+/* struct xpc_notify_sn2 type of notification */
+
+#define XPC_N_CALL 0x01 /* notify function provided by user */
-#define XPC_N_CALL 0x01 /* notify function provided by user */
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+ struct xpc_fifo_entry_uv next;
+ unsigned int msg_slot_number;
+ xpc_notify_func func; /* user's notify function */
+ void *key; /* pointer to user's key */
+};
/*
* Define the structure that manages all the stuff required by a channel. In
@@ -339,8 +399,12 @@ struct xpc_notify {
* There is an array of these structures for each remote partition. It is
* allocated at the time a partition becomes active. The array contains one
* of these structures for each potential channel connection to that partition.
+ */
+
+/*
+ * The following is sn2 only.
*
- * Each of these structures manages two message queues (circular buffers).
+ * Each channel structure manages two message queues (circular buffers).
* They are allocated at the time a channel connection is made. One of
* these message queues (local_msgqueue) holds the locally created messages
* that are destined for the remote partition. The other of these message
@@ -407,58 +471,72 @@ struct xpc_notify {
* new messages, by the clearing of the message flags of the acknowledged
* messages.
*/
+
+struct xpc_channel_sn2 {
+ struct xpc_openclose_args *local_openclose_args; /* args passed on */
+ /* opening or closing of channel */
+
+ void *local_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg_sn2 *local_msgqueue; /* local message queue */
+ void *remote_msgqueue_base; /* base address of kmalloc'd space */
+ struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
+ /* partition's local message queue */
+ unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+ /* local message queue */
+
+ struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
+
+ /* various flavors of local and remote Get/Put values */
+
+ struct xpc_gp_sn2 *local_GP; /* local Get/Put values */
+ struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */
+ struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */
+ struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */
+ s64 next_msg_to_pull; /* Put value of next msg to pull */
+
+ struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
+};
+
+struct xpc_channel_uv {
+ unsigned long remote_notify_mq_gpa; /* gru phys address of remote */
+ /* partition's notify mq */
+
+ struct xpc_send_msg_slot_uv *send_msg_slots;
+ struct xpc_notify_mq_msg_uv *recv_msg_slots;
+
+ struct xpc_fifo_head_uv msg_slot_free_list;
+ struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */
+};
+
struct xpc_channel {
short partid; /* ID of remote partition connected */
spinlock_t lock; /* lock for updating this structure */
- u32 flags; /* general flags */
+ unsigned int flags; /* general flags */
enum xp_retval reason; /* reason why channel is disconnect'g */
int reason_line; /* line# disconnect initiated from */
u16 number; /* channel # */
- u16 msg_size; /* sizeof each msg entry */
+ u16 entry_size; /* sizeof each msg entry */
u16 local_nentries; /* #of msg entries in local msg queue */
u16 remote_nentries; /* #of msg entries in remote msg queue */
- void *local_msgqueue_base; /* base address of kmalloc'd space */
- struct xpc_msg *local_msgqueue; /* local message queue */
- void *remote_msgqueue_base; /* base address of kmalloc'd space */
- struct xpc_msg *remote_msgqueue; /* cached copy of remote partition's */
- /* local message queue */
- u64 remote_msgqueue_pa; /* phys addr of remote partition's */
- /* local message queue */
-
atomic_t references; /* #of external references to queues */
atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
- u8 delayed_IPI_flags; /* IPI flags received, but delayed */
+ u8 delayed_chctl_flags; /* chctl flags received, but delayed */
/* action until channel disconnected */
- /* queue of msg senders who want to be notified when msg received */
-
atomic_t n_to_notify; /* #of msg senders to notify */
- struct xpc_notify *notify_queue; /* notify queue for messages sent */
xpc_channel_func func; /* user's channel function */
void *key; /* pointer to user's key */
- struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
struct completion wdisconnect_wait; /* wait for channel disconnect */
- struct xpc_openclose_args *local_openclose_args; /* args passed on */
- /* opening or closing of channel */
-
- /* various flavors of local and remote Get/Put values */
-
- struct xpc_gp *local_GP; /* local Get/Put values */
- struct xpc_gp remote_GP; /* remote Get/Put values */
- struct xpc_gp w_local_GP; /* working local Get/Put values */
- struct xpc_gp w_remote_GP; /* working remote Get/Put values */
- s64 next_msg_to_pull; /* Put value of next msg to pull */
-
/* kthread management related fields */
atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
@@ -469,6 +547,11 @@ struct xpc_channel {
wait_queue_head_t idle_wq; /* idle kthread wait queue */
+ union {
+ struct xpc_channel_sn2 sn2;
+ struct xpc_channel_uv uv;
+ } sn;
+
} ____cacheline_aligned;
/* struct xpc_channel flags */
@@ -501,33 +584,128 @@ struct xpc_channel {
#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */
/*
- * Manages channels on a partition basis. There is one of these structures
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+ u64 all_flags;
+ u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define XPC_CHCTL_CLOSEREQUEST 0x01
+#define XPC_CHCTL_CLOSEREPLY 0x02
+#define XPC_CHCTL_OPENREQUEST 0x04
+#define XPC_CHCTL_OPENREPLY 0x08
+#define XPC_CHCTL_MSGREQUEST 0x10
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+ (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+ XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY)
+#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+ int ch_number;
+
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+ if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+ int ch_number;
+
+ for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+ if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Manage channels on a partition basis. There is one of these structures
* for each partition (a partition will never utilize the structure that
* represents itself).
*/
+
+struct xpc_partition_sn2 {
+ unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
+ int activate_IRQ_nasid; /* active partition's act/deact nasid */
+ int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */
+
+ unsigned long remote_vars_pa; /* phys addr of partition's vars */
+ unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
+ u8 remote_vars_version; /* version# of partition's vars */
+
+ void *local_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */
+ void *remote_GPs_base; /* base address of kmalloc'd space */
+ struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */
+ /* Get/Put values */
+ unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
+ /* Get/Put values */
+
+ void *local_openclose_args_base; /* base address of kmalloc'd space */
+ struct xpc_openclose_args *local_openclose_args; /* local's args */
+ unsigned long remote_openclose_args_pa; /* phys addr of remote's args */
+
+ int notify_IRQ_nasid; /* nasid of where to send notify IRQs */
+ int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */
+ char notify_IRQ_owner[8]; /* notify IRQ's owner's name */
+
+ struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
+ struct amo *local_chctl_amo_va; /* address of chctl flags' amo */
+
+ struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */
+};
+
+struct xpc_partition_uv {
+ unsigned long remote_activate_mq_gpa; /* gru phys address of remote */
+ /* partition's activate mq */
+ spinlock_t flags_lock; /* protect updating of flags */
+ unsigned int flags; /* general flags */
+ u8 remote_act_state; /* remote partition's act_state */
+ u8 act_state_req; /* act_state request from remote partition */
+ enum xp_retval reason; /* reason for deactivate act_state request */
+ u64 heartbeat; /* incremented by remote partition */
+};
+
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001
+#define XPC_P_ENGAGED_UV 0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV 0x01
+#define XPC_P_ASR_REACTIVATE_UV 0x02
+#define XPC_P_ASR_DEACTIVATE_UV 0x03
+
struct xpc_partition {
/* XPC HB infrastructure */
u8 remote_rp_version; /* version# of partition's rsvd pg */
- struct timespec remote_rp_stamp; /* time when rsvd pg was initialized */
- u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
- u64 remote_vars_pa; /* phys addr of partition's vars */
- u64 remote_vars_part_pa; /* phys addr of partition's vars part */
+ unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
+ unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */
u64 last_heartbeat; /* HB at last read */
- u64 remote_amos_page_pa; /* phys addr of partition's amos page */
- int remote_act_nasid; /* active part's act/deact nasid */
- int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
- u32 act_IRQ_rcvd; /* IRQs since activation */
+ u32 activate_IRQ_rcvd; /* IRQs since activation */
spinlock_t act_lock; /* protect updating of act_state */
u8 act_state; /* from XPC HB viewpoint */
- u8 remote_vars_version; /* version# of partition's vars */
enum xp_retval reason; /* reason partition is deactivating */
int reason_line; /* line# deactivation initiated from */
- int reactivate_nasid; /* nasid in partition to reactivate */
- unsigned long disengage_request_timeout; /* timeout in jiffies */
- struct timer_list disengage_request_timer;
+ unsigned long disengage_timeout; /* timeout in jiffies */
+ struct timer_list disengage_timer;
/* XPC infrastructure referencing and teardown control */
@@ -535,85 +713,63 @@ struct xpc_partition {
wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
atomic_t references; /* #of references to infrastructure */
- /*
- * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
- * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
- * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
- * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
- */
-
u8 nchannels; /* #of defined channels supported */
atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
atomic_t nchannels_engaged; /* #of channels engaged with remote part */
struct xpc_channel *channels; /* array of channel structures */
- void *local_GPs_base; /* base address of kmalloc'd space */
- struct xpc_gp *local_GPs; /* local Get/Put values */
- void *remote_GPs_base; /* base address of kmalloc'd space */
- struct xpc_gp *remote_GPs; /* copy of remote partition's local */
- /* Get/Put values */
- u64 remote_GPs_pa; /* phys address of remote partition's local */
- /* Get/Put values */
+ /* fields used for managing channel avialability and activity */
- /* fields used to pass args when opening or closing a channel */
+ union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+ spinlock_t chctl_lock; /* chctl flags lock */
- void *local_openclose_args_base; /* base address of kmalloc'd space */
- struct xpc_openclose_args *local_openclose_args; /* local's args */
void *remote_openclose_args_base; /* base address of kmalloc'd space */
struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
/* args */
- u64 remote_openclose_args_pa; /* phys addr of remote's args */
-
- /* IPI sending, receiving and handling related fields */
-
- int remote_IPI_nasid; /* nasid of where to send IPIs */
- int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
- AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
-
- AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
- u64 local_IPI_amo; /* IPI amo flags yet to be handled */
- char IPI_owner[8]; /* IPI owner's name */
- struct timer_list dropped_IPI_timer; /* dropped IPI timer */
-
- spinlock_t IPI_lock; /* IPI handler lock */
/* channel manager related fields */
atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
+ union {
+ struct xpc_partition_sn2 sn2;
+ struct xpc_partition_uv uv;
+ } sn;
+
} ____cacheline_aligned;
/* struct xpc_partition act_state values (for XPC HB) */
-#define XPC_P_INACTIVE 0x00 /* partition is not active */
-#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
-#define XPC_P_ACTIVATING 0x02 /* activation thread started */
-#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
-#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
+#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */
+#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */
+#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */
#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
xpc_deactivate_partition(__LINE__, (_p), (_reason))
/* struct xpc_partition setup_state values */
-#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
-#define XPC_P_SETUP 0x01 /* infrastructure is setup */
-#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
-#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
+#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */
+#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */
/*
- * struct xpc_partition IPI_timer #of seconds to wait before checking for
- * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
- * after the IPI was received.
+ * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
+ * following interval #of seconds before checking for dropped notify IRQs.
+ * These can occur whenever an IRQ's associated amo write doesn't complete
+ * until after the IRQ was received.
*/
-#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
+#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ)
/* number of seconds to wait for other partitions to disengage */
-#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90
-/* interval in seconds to print 'waiting disengagement' messages */
-#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10
#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0]))
@@ -623,33 +779,92 @@ extern struct xpc_registration xpc_registrations[];
/* found in xpc_main.c */
extern struct device *xpc_part;
extern struct device *xpc_chan;
-extern int xpc_disengage_request_timelimit;
-extern int xpc_disengage_request_timedout;
-extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
-extern void xpc_dropped_IPI_check(struct xpc_partition *);
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_heartbeating_to_mask;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
extern void xpc_activate_partition(struct xpc_partition *);
extern void xpc_activate_kthreads(struct xpc_channel *, int);
extern void xpc_create_kthreads(struct xpc_channel *, int, int);
extern void xpc_disconnect_wait(int);
+extern int (*xpc_setup_partitions_sn) (void);
+extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
+ unsigned long *,
+ size_t *);
+extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
+extern void (*xpc_heartbeat_init) (void);
+extern void (*xpc_heartbeat_exit) (void);
+extern void (*xpc_increment_heartbeat) (void);
+extern void (*xpc_offline_heartbeat) (void);
+extern void (*xpc_online_heartbeat) (void);
+extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
+extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
+extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
+extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
+extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
+extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
+extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
+extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
+extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
+extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
+ unsigned long, int);
+extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
+extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
+extern void (*xpc_cancel_partition_deactivation_request) (
+ struct xpc_partition *);
+extern void (*xpc_process_activate_IRQ_rcvd) (void);
+extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
+extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
+
+extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
+extern int (*xpc_partition_engaged) (short);
+extern int (*xpc_any_partition_engaged) (void);
+extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
+extern void (*xpc_assume_partition_disengaged) (short);
+
+extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
+ unsigned long *);
+extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
+ unsigned long *);
+extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
+ unsigned long *);
+extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+
+extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
+ unsigned long);
+
+extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
+ u16, u8, xpc_notify_func, void *);
+extern void (*xpc_received_payload) (struct xpc_channel *, void *);
+
+/* found in xpc_sn2.c */
+extern int xpc_init_sn2(void);
+extern void xpc_exit_sn2(void);
+
+/* found in xpc_uv.c */
+extern int xpc_init_uv(void);
+extern void xpc_exit_uv(void);
/* found in xpc_partition.c */
extern int xpc_exiting;
-extern struct xpc_vars *xpc_vars;
+extern int xpc_nasid_mask_nlongs;
extern struct xpc_rsvd_page *xpc_rsvd_page;
-extern struct xpc_vars_part *xpc_vars_part;
-extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char *xpc_remote_copy_buffer;
-extern void *xpc_remote_copy_buffer_base;
+extern unsigned long *xpc_mach_nasids;
+extern struct xpc_partition *xpc_partitions;
extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
-extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
-extern void xpc_allow_IPI_ops(void);
-extern void xpc_restrict_IPI_ops(void);
-extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
+extern int xpc_identify_activate_IRQ_sender(void);
extern int xpc_partition_disengaged(struct xpc_partition *);
extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
extern void xpc_mark_partition_inactive(struct xpc_partition *);
extern void xpc_discovery(void);
-extern void xpc_check_remote_hb(void);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+ struct xpc_rsvd_page *,
+ unsigned long *);
extern void xpc_deactivate_partition(const int, struct xpc_partition *,
enum xp_retval);
extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
@@ -657,21 +872,52 @@ extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
/* found in xpc_channel.c */
extern void xpc_initiate_connect(int);
extern void xpc_initiate_disconnect(int);
-extern enum xp_retval xpc_initiate_allocate(short, int, u32, void **);
-extern enum xp_retval xpc_initiate_send(short, int, void *);
-extern enum xp_retval xpc_initiate_send_notify(short, int, void *,
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
xpc_notify_func, void *);
extern void xpc_initiate_received(short, int, void *);
-extern enum xp_retval xpc_setup_infrastructure(struct xpc_partition *);
-extern enum xp_retval xpc_pull_remote_vars_part(struct xpc_partition *);
-extern void xpc_process_channel_activity(struct xpc_partition *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
extern void xpc_connected_callout(struct xpc_channel *);
-extern void xpc_deliver_msg(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
extern void xpc_disconnect_channel(const int, struct xpc_channel *,
enum xp_retval, unsigned long *);
extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
-extern void xpc_teardown_infrastructure(struct xpc_partition *);
+
+static inline int
+xpc_hb_allowed(short partid, void *heartbeating_to_mask)
+{
+ return test_bit(partid, heartbeating_to_mask);
+}
+
+static inline int
+xpc_any_hbs_allowed(void)
+{
+ DBUG_ON(xpc_heartbeating_to_mask == NULL);
+ return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
+
+static inline void
+xpc_allow_hb(short partid)
+{
+ DBUG_ON(xpc_heartbeating_to_mask == NULL);
+ set_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_hb(short partid)
+{
+ DBUG_ON(xpc_heartbeating_to_mask == NULL);
+ clear_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_all_hbs(void)
+{
+ DBUG_ON(xpc_heartbeating_to_mask == NULL);
+ bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
static inline void
xpc_wakeup_channel_mgr(struct xpc_partition *part)
@@ -713,7 +959,7 @@ xpc_part_deref(struct xpc_partition *part)
s32 refs = atomic_dec_return(&part->references);
DBUG_ON(refs < 0);
- if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN)
+ if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
wake_up(&part->teardown_wq);
}
@@ -723,7 +969,7 @@ xpc_part_ref(struct xpc_partition *part)
int setup;
atomic_inc(&part->references);
- setup = (part->setup_state == XPC_P_SETUP);
+ setup = (part->setup_state == XPC_P_SS_SETUP);
if (!setup)
xpc_part_deref(part);
@@ -741,416 +987,4 @@ xpc_part_ref(struct xpc_partition *part)
(_p)->reason_line = _line; \
}
-/*
- * This next set of inlines are used to keep track of when a partition is
- * potentially engaged in accessing memory belonging to another partition.
- */
-
-static inline void
-xpc_mark_partition_engaged(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
- (XPC_ENGAGED_PARTITIONS_AMO *
- sizeof(AMO_t)));
-
- local_irq_save(irq_flags);
-
- /* set bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
- (1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_mark_partition_disengaged(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
- (XPC_ENGAGED_PARTITIONS_AMO *
- sizeof(AMO_t)));
-
- local_irq_save(irq_flags);
-
- /* clear bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
- ~(1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_request_partition_disengage(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
- (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
- local_irq_save(irq_flags);
-
- /* set bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
- (1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_cancel_partition_disengage_request(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *)__va(part->remote_amos_page_pa +
- (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
- local_irq_save(irq_flags);
-
- /* clear bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
- ~(1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline u64
-xpc_partition_engaged(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
- /* return our partition's AMO variable ANDed with partid_mask */
- return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
- partid_mask);
-}
-
-static inline u64
-xpc_partition_disengage_requested(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
- /* return our partition's AMO variable ANDed with partid_mask */
- return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
- partid_mask);
-}
-
-static inline void
-xpc_clear_partition_engaged(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
- /* clear bit(s) based on partid_mask in our partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
- ~partid_mask);
-}
-
-static inline void
-xpc_clear_partition_disengage_request(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
- /* clear bit(s) based on partid_mask in our partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
- ~partid_mask);
-}
-
-/*
- * The following set of macros and inlines are used for the sending and
- * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
- * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
- * the other that is associated with channel activity (SGI_XPC_NOTIFY).
- */
-
-static inline u64
-xpc_IPI_receive(AMO_t *amo)
-{
- return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
-}
-
-static inline enum xp_retval
-xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
-{
- int ret = 0;
- unsigned long irq_flags;
-
- local_irq_save(irq_flags);
-
- FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
- sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
-
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-
- return ((ret == 0) ? xpSuccess : xpPioReadError);
-}
-
-/*
- * IPIs associated with SGI_XPC_ACTIVATE IRQ.
- */
-
-/*
- * Flag the appropriate AMO variable and send an IPI to the specified node.
- */
-static inline void
-xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
- int to_phys_cpuid)
-{
- int w_index = XPC_NASID_W_INDEX(from_nasid);
- int b_index = XPC_NASID_B_INDEX(from_nasid);
- AMO_t *amos = (AMO_t *)__va(amos_page_pa +
- (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
-
- (void)xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
- to_phys_cpuid, SGI_XPC_ACTIVATE);
-}
-
-static inline void
-xpc_IPI_send_activate(struct xpc_vars *vars)
-{
- xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
- vars->act_nasid, vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_activated(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
- part->remote_act_nasid,
- part->remote_act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_reactivate(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
- xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_disengage(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
- part->remote_act_nasid,
- part->remote_act_phys_cpuid);
-}
-
-/*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
- */
-
-/*
- * Send an IPI to the remote partition that is associated with the
- * specified channel.
- */
-#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
- xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
-
-static inline void
-xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
- unsigned long *irq_flags)
-{
- struct xpc_partition *part = &xpc_partitions[ch->partid];
- enum xp_retval ret;
-
- if (likely(part->act_state != XPC_P_DEACTIVATING)) {
- ret = xpc_IPI_send(part->remote_IPI_amo_va,
- (u64)ipi_flag << (ch->number * 8),
- part->remote_IPI_nasid,
- part->remote_IPI_phys_cpuid, SGI_XPC_NOTIFY);
- dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
- ipi_flag_string, ch->partid, ch->number, ret);
- if (unlikely(ret != xpSuccess)) {
- if (irq_flags != NULL)
- spin_unlock_irqrestore(&ch->lock, *irq_flags);
- XPC_DEACTIVATE_PARTITION(part, ret);
- if (irq_flags != NULL)
- spin_lock_irqsave(&ch->lock, *irq_flags);
- }
- }
-}
-
-/*
- * Make it look like the remote partition, which is associated with the
- * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_IPI_check().
- */
-#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
- xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
-
-static inline void
-xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
- char *ipi_flag_string)
-{
- struct xpc_partition *part = &xpc_partitions[ch->partid];
-
- FETCHOP_STORE_OP(TO_AMO((u64)&part->local_IPI_amo_va->variable),
- FETCHOP_OR, ((u64)ipi_flag << (ch->number * 8)));
- dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
- ipi_flag_string, ch->partid, ch->number);
-}
-
-/*
- * The sending and receiving of IPIs includes the setting of an AMO variable
- * to indicate the reason the IPI was sent. The 64-bit variable is divided
- * up into eight bytes, ordered from right to left. Byte zero pertains to
- * channel 0, byte one to channel 1, and so on. Each byte is described by
- * the following IPI flags.
- */
-
-#define XPC_IPI_CLOSEREQUEST 0x01
-#define XPC_IPI_CLOSEREPLY 0x02
-#define XPC_IPI_OPENREQUEST 0x04
-#define XPC_IPI_OPENREPLY 0x08
-#define XPC_IPI_MSGREQUEST 0x10
-
-/* given an AMO variable and a channel#, get its associated IPI flags */
-#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
-#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
-
-#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0fUL)
-#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010UL)
-
-static inline void
-xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
- args->reason = ch->reason;
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
- args->msg_size = ch->msg_size;
- args->local_nentries = ch->local_nentries;
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
- args->remote_nentries = ch->remote_nentries;
- args->local_nentries = ch->local_nentries;
- args->local_msgqueue_pa = __pa(ch->local_msgqueue);
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_msgrequest(struct xpc_channel *ch)
-{
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
-}
-
-static inline void
-xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
-{
- XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
-}
-
-/*
- * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
- * pages are located in the lowest granule. The lowest granule uses 4k pages
- * for cached references and an alternate TLB handler to never provide a
- * cacheable mapping for the entire region. This will prevent speculative
- * reading of cached copies of our lines from being issued which will cause
- * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
- * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
- * activation and 2 AMO variables for partition deactivation.
- */
-static inline AMO_t *
-xpc_IPI_init(int index)
-{
- AMO_t *amo = xpc_vars->amos_page + index;
-
- (void)xpc_IPI_receive(amo); /* clear AMO variable */
- return amo;
-}
-
-static inline enum xp_retval
-xpc_map_bte_errors(bte_result_t error)
-{
- return ((error == BTE_SUCCESS) ? xpSuccess : xpBteCopyError);
-}
-
-/*
- * Check to see if there is any channel activity to/from the specified
- * partition.
- */
-static inline void
-xpc_check_for_channel_activity(struct xpc_partition *part)
-{
- u64 IPI_amo;
- unsigned long irq_flags;
-
- IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
- if (IPI_amo == 0)
- return;
-
- spin_lock_irqsave(&part->IPI_lock, irq_flags);
- part->local_IPI_amo |= IPI_amo;
- spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
- dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
- XPC_PARTID(part), IPI_amo);
-
- xpc_wakeup_channel_mgr(part);
-}
-
#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 9c90c2d55c0..9cd2ebe2a3b 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -14,536 +14,10 @@
*
*/
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/completion.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/sn_sal.h>
+#include <linux/device.h>
#include "xpc.h"
/*
- * Guarantee that the kzalloc'd memory is cacheline aligned.
- */
-static void *
-xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
- /* see if kzalloc will give us cachline aligned memory by default */
- *base = kzalloc(size, flags);
- if (*base == NULL)
- return NULL;
-
- if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
- return *base;
-
- kfree(*base);
-
- /* nope, we'll have to do it ourselves */
- *base = kzalloc(size + L1_CACHE_BYTES, flags);
- if (*base == NULL)
- return NULL;
-
- return (void *)L1_CACHE_ALIGN((u64)*base);
-}
-
-/*
- * Set up the initial values for the XPartition Communication channels.
- */
-static void
-xpc_initialize_channels(struct xpc_partition *part, short partid)
-{
- int ch_number;
- struct xpc_channel *ch;
-
- for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
- ch = &part->channels[ch_number];
-
- ch->partid = partid;
- ch->number = ch_number;
- ch->flags = XPC_C_DISCONNECTED;
-
- ch->local_GP = &part->local_GPs[ch_number];
- ch->local_openclose_args =
- &part->local_openclose_args[ch_number];
-
- atomic_set(&ch->kthreads_assigned, 0);
- atomic_set(&ch->kthreads_idle, 0);
- atomic_set(&ch->kthreads_active, 0);
-
- atomic_set(&ch->references, 0);
- atomic_set(&ch->n_to_notify, 0);
-
- spin_lock_init(&ch->lock);
- mutex_init(&ch->msg_to_pull_mutex);
- init_completion(&ch->wdisconnect_wait);
-
- atomic_set(&ch->n_on_msg_allocate_wq, 0);
- init_waitqueue_head(&ch->msg_allocate_wq);
- init_waitqueue_head(&ch->idle_wq);
- }
-}
-
-/*
- * Setup the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-enum xp_retval
-xpc_setup_infrastructure(struct xpc_partition *part)
-{
- int ret, cpuid;
- struct timer_list *timer;
- short partid = XPC_PARTID(part);
-
- /*
- * Zero out MOST of the entry for this partition. Only the fields
- * starting with `nchannels' will be zeroed. The preceding fields must
- * remain `viable' across partition ups and downs, since they may be
- * referenced during this memset() operation.
- */
- memset(&part->nchannels, 0, sizeof(struct xpc_partition) -
- offsetof(struct xpc_partition, nchannels));
-
- /*
- * Allocate all of the channel structures as a contiguous chunk of
- * memory.
- */
- part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
- GFP_KERNEL);
- if (part->channels == NULL) {
- dev_err(xpc_chan, "can't get memory for channels\n");
- return xpNoMemory;
- }
-
- part->nchannels = XPC_NCHANNELS;
-
- /* allocate all the required GET/PUT values */
-
- part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
- GFP_KERNEL,
- &part->local_GPs_base);
- if (part->local_GPs == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- dev_err(xpc_chan, "can't get memory for local get/put "
- "values\n");
- return xpNoMemory;
- }
-
- part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
- GFP_KERNEL,
- &part->
- remote_GPs_base);
- if (part->remote_GPs == NULL) {
- dev_err(xpc_chan, "can't get memory for remote get/put "
- "values\n");
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->channels);
- part->channels = NULL;
- return xpNoMemory;
- }
-
- /* allocate all the required open and close args */
-
- part->local_openclose_args =
- xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
- &part->local_openclose_args_base);
- if (part->local_openclose_args == NULL) {
- dev_err(xpc_chan, "can't get memory for local connect args\n");
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->channels);
- part->channels = NULL;
- return xpNoMemory;
- }
-
- part->remote_openclose_args =
- xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
- &part->remote_openclose_args_base);
- if (part->remote_openclose_args == NULL) {
- dev_err(xpc_chan, "can't get memory for remote connect args\n");
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->channels);
- part->channels = NULL;
- return xpNoMemory;
- }
-
- xpc_initialize_channels(part, partid);
-
- atomic_set(&part->nchannels_active, 0);
- atomic_set(&part->nchannels_engaged, 0);
-
- /* local_IPI_amo were set to 0 by an earlier memset() */
-
- /* Initialize this partitions AMO_t structure */
- part->local_IPI_amo_va = xpc_IPI_init(partid);
-
- spin_lock_init(&part->IPI_lock);
-
- atomic_set(&part->channel_mgr_requests, 1);
- init_waitqueue_head(&part->channel_mgr_wq);
-
- sprintf(part->IPI_owner, "xpc%02d", partid);
- ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, IRQF_SHARED,
- part->IPI_owner, (void *)(u64)partid);
- if (ret != 0) {
- dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
- "errno=%d\n", -ret);
- kfree(part->remote_openclose_args_base);
- part->remote_openclose_args = NULL;
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->channels);
- part->channels = NULL;
- return xpLackOfResources;
- }
-
- /* Setup a timer to check for dropped IPIs */
- timer = &part->dropped_IPI_timer;
- init_timer(timer);
- timer->function = (void (*)(unsigned long))xpc_dropped_IPI_check;
- timer->data = (unsigned long)part;
- timer->expires = jiffies + XPC_P_DROPPED_IPI_WAIT;
- add_timer(timer);
-
- /*
- * With the setting of the partition setup_state to XPC_P_SETUP, we're
- * declaring that this partition is ready to go.
- */
- part->setup_state = XPC_P_SETUP;
-
- /*
- * Setup the per partition specific variables required by the
- * remote partition to establish channel connections with us.
- *
- * The setting of the magic # indicates that these per partition
- * specific variables are ready to be used.
- */
- xpc_vars_part[partid].GPs_pa = __pa(part->local_GPs);
- xpc_vars_part[partid].openclose_args_pa =
- __pa(part->local_openclose_args);
- xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
- cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
- xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
- xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
- xpc_vars_part[partid].nchannels = part->nchannels;
- xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
-
- return xpSuccess;
-}
-
-/*
- * Create a wrapper that hides the underlying mechanism for pulling a cacheline
- * (or multiple cachelines) from a remote partition.
- *
- * src must be a cacheline aligned physical address on the remote partition.
- * dst must be a cacheline aligned virtual address on this partition.
- * cnt must be an cacheline sized
- */
-static enum xp_retval
-xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
- const void *src, size_t cnt)
-{
- bte_result_t bte_ret;
-
- DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
- DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
- DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
-
- if (part->act_state == XPC_P_DEACTIVATING)
- return part->reason;
-
- bte_ret = xp_bte_copy((u64)src, (u64)dst, (u64)cnt,
- (BTE_NORMAL | BTE_WACQUIRE), NULL);
- if (bte_ret == BTE_SUCCESS)
- return xpSuccess;
-
- dev_dbg(xpc_chan, "xp_bte_copy() from partition %d failed, ret=%d\n",
- XPC_PARTID(part), bte_ret);
-
- return xpc_map_bte_errors(bte_ret);
-}
-
-/*
- * Pull the remote per partition specific variables from the specified
- * partition.
- */
-enum xp_retval
-xpc_pull_remote_vars_part(struct xpc_partition *part)
-{
- u8 buffer[L1_CACHE_BYTES * 2];
- struct xpc_vars_part *pulled_entry_cacheline =
- (struct xpc_vars_part *)L1_CACHE_ALIGN((u64)buffer);
- struct xpc_vars_part *pulled_entry;
- u64 remote_entry_cacheline_pa, remote_entry_pa;
- short partid = XPC_PARTID(part);
- enum xp_retval ret;
-
- /* pull the cacheline that contains the variables we're interested in */
-
- DBUG_ON(part->remote_vars_part_pa !=
- L1_CACHE_ALIGN(part->remote_vars_part_pa));
- DBUG_ON(sizeof(struct xpc_vars_part) != L1_CACHE_BYTES / 2);
-
- remote_entry_pa = part->remote_vars_part_pa +
- sn_partition_id * sizeof(struct xpc_vars_part);
-
- remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
-
- pulled_entry = (struct xpc_vars_part *)((u64)pulled_entry_cacheline +
- (remote_entry_pa &
- (L1_CACHE_BYTES - 1)));
-
- ret = xpc_pull_remote_cachelines(part, pulled_entry_cacheline,
- (void *)remote_entry_cacheline_pa,
- L1_CACHE_BYTES);
- if (ret != xpSuccess) {
- dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
- "partition %d, ret=%d\n", partid, ret);
- return ret;
- }
-
- /* see if they've been set up yet */
-
- if (pulled_entry->magic != XPC_VP_MAGIC1 &&
- pulled_entry->magic != XPC_VP_MAGIC2) {
-
- if (pulled_entry->magic != 0) {
- dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
- "partition %d has bad magic value (=0x%lx)\n",
- partid, sn_partition_id, pulled_entry->magic);
- return xpBadMagic;
- }
-
- /* they've not been initialized yet */
- return xpRetry;
- }
-
- if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
-
- /* validate the variables */
-
- if (pulled_entry->GPs_pa == 0 ||
- pulled_entry->openclose_args_pa == 0 ||
- pulled_entry->IPI_amo_pa == 0) {
-
- dev_err(xpc_chan, "partition %d's XPC vars_part for "
- "partition %d are not valid\n", partid,
- sn_partition_id);
- return xpInvalidAddress;
- }
-
- /* the variables we imported look to be valid */
-
- part->remote_GPs_pa = pulled_entry->GPs_pa;
- part->remote_openclose_args_pa =
- pulled_entry->openclose_args_pa;
- part->remote_IPI_amo_va =
- (AMO_t *)__va(pulled_entry->IPI_amo_pa);
- part->remote_IPI_nasid = pulled_entry->IPI_nasid;
- part->remote_IPI_phys_cpuid = pulled_entry->IPI_phys_cpuid;
-
- if (part->nchannels > pulled_entry->nchannels)
- part->nchannels = pulled_entry->nchannels;
-
- /* let the other side know that we've pulled their variables */
-
- xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
- }
-
- if (pulled_entry->magic == XPC_VP_MAGIC1)
- return xpRetry;
-
- return xpSuccess;
-}
-
-/*
- * Get the IPI flags and pull the openclose args and/or remote GPs as needed.
- */
-static u64
-xpc_get_IPI_flags(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- u64 IPI_amo;
- enum xp_retval ret;
-
- /*
- * See if there are any IPI flags to be handled.
- */
-
- spin_lock_irqsave(&part->IPI_lock, irq_flags);
- IPI_amo = part->local_IPI_amo;
- if (IPI_amo != 0)
- part->local_IPI_amo = 0;
-
- spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
- if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_amo)) {
- ret = xpc_pull_remote_cachelines(part,
- part->remote_openclose_args,
- (void *)part->
- remote_openclose_args_pa,
- XPC_OPENCLOSE_ARGS_SIZE);
- if (ret != xpSuccess) {
- XPC_DEACTIVATE_PARTITION(part, ret);
-
- dev_dbg(xpc_chan, "failed to pull openclose args from "
- "partition %d, ret=%d\n", XPC_PARTID(part),
- ret);
-
- /* don't bother processing IPIs anymore */
- IPI_amo = 0;
- }
- }
-
- if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_amo)) {
- ret = xpc_pull_remote_cachelines(part, part->remote_GPs,
- (void *)part->remote_GPs_pa,
- XPC_GP_SIZE);
- if (ret != xpSuccess) {
- XPC_DEACTIVATE_PARTITION(part, ret);
-
- dev_dbg(xpc_chan, "failed to pull GPs from partition "
- "%d, ret=%d\n", XPC_PARTID(part), ret);
-
- /* don't bother processing IPIs anymore */
- IPI_amo = 0;
- }
- }
-
- return IPI_amo;
-}
-
-/*
- * Allocate the local message queue and the notify queue.
- */
-static enum xp_retval
-xpc_allocate_local_msgqueue(struct xpc_channel *ch)
-{
- unsigned long irq_flags;
- int nentries;
- size_t nbytes;
-
- for (nentries = ch->local_nentries; nentries > 0; nentries--) {
-
- nbytes = nentries * ch->msg_size;
- ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
- GFP_KERNEL,
- &ch->local_msgqueue_base);
- if (ch->local_msgqueue == NULL)
- continue;
-
- nbytes = nentries * sizeof(struct xpc_notify);
- ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
- if (ch->notify_queue == NULL) {
- kfree(ch->local_msgqueue_base);
- ch->local_msgqueue = NULL;
- continue;
- }
-
- spin_lock_irqsave(&ch->lock, irq_flags);
- if (nentries < ch->local_nentries) {
- dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
- "partid=%d, channel=%d\n", nentries,
- ch->local_nentries, ch->partid, ch->number);
-
- ch->local_nentries = nentries;
- }
- spin_unlock_irqrestore(&ch->lock, irq_flags);
- return xpSuccess;
- }
-
- dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
- "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
- return xpNoMemory;
-}
-
-/*
- * Allocate the cached remote message queue.
- */
-static enum xp_retval
-xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
-{
- unsigned long irq_flags;
- int nentries;
- size_t nbytes;
-
- DBUG_ON(ch->remote_nentries <= 0);
-
- for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
-
- nbytes = nentries * ch->msg_size;
- ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
- GFP_KERNEL,
- &ch->remote_msgqueue_base);
- if (ch->remote_msgqueue == NULL)
- continue;
-
- spin_lock_irqsave(&ch->lock, irq_flags);
- if (nentries < ch->remote_nentries) {
- dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
- "partid=%d, channel=%d\n", nentries,
- ch->remote_nentries, ch->partid, ch->number);
-
- ch->remote_nentries = nentries;
- }
- spin_unlock_irqrestore(&ch->lock, irq_flags);
- return xpSuccess;
- }
-
- dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
- "partid=%d, channel=%d\n", ch->partid, ch->number);
- return xpNoMemory;
-}
-
-/*
- * Allocate message queues and other stuff associated with a channel.
- *
- * Note: Assumes all of the channel sizes are filled in.
- */
-static enum xp_retval
-xpc_allocate_msgqueues(struct xpc_channel *ch)
-{
- unsigned long irq_flags;
- enum xp_retval ret;
-
- DBUG_ON(ch->flags & XPC_C_SETUP);
-
- ret = xpc_allocate_local_msgqueue(ch);
- if (ret != xpSuccess)
- return ret;
-
- ret = xpc_allocate_remote_msgqueue(ch);
- if (ret != xpSuccess) {
- kfree(ch->local_msgqueue_base);
- ch->local_msgqueue = NULL;
- kfree(ch->notify_queue);
- ch->notify_queue = NULL;
- return ret;
- }
-
- spin_lock_irqsave(&ch->lock, irq_flags);
- ch->flags |= XPC_C_SETUP;
- spin_unlock_irqrestore(&ch->lock, irq_flags);
-
- return xpSuccess;
-}
-
-/*
* Process a connect message from a remote partition.
*
* Note: xpc_process_connect() is expecting to be called with the
@@ -565,30 +39,29 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
if (!(ch->flags & XPC_C_SETUP)) {
spin_unlock_irqrestore(&ch->lock, *irq_flags);
- ret = xpc_allocate_msgqueues(ch);
+ ret = xpc_setup_msg_structures(ch);
spin_lock_irqsave(&ch->lock, *irq_flags);
if (ret != xpSuccess)
XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+ ch->flags |= XPC_C_SETUP;
+
if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
return;
- DBUG_ON(!(ch->flags & XPC_C_SETUP));
DBUG_ON(ch->local_msgqueue == NULL);
DBUG_ON(ch->remote_msgqueue == NULL);
}
if (!(ch->flags & XPC_C_OPENREPLY)) {
ch->flags |= XPC_C_OPENREPLY;
- xpc_IPI_send_openreply(ch, irq_flags);
+ xpc_send_chctl_openreply(ch, irq_flags);
}
if (!(ch->flags & XPC_C_ROPENREPLY))
return;
- DBUG_ON(ch->remote_msgqueue_pa == 0);
-
ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
dev_info(xpc_chan, "channel %d to partition %d connected\n",
@@ -600,99 +73,6 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
}
/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xp_retval reason, s64 put)
-{
- struct xpc_notify *notify;
- u8 notify_type;
- s64 get = ch->w_remote_GP.get - 1;
-
- while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
- notify = &ch->notify_queue[get % ch->local_nentries];
-
- /*
- * See if the notify entry indicates it was associated with
- * a message who's sender wants to be notified. It is possible
- * that it is, but someone else is doing or has done the
- * notification.
- */
- notify_type = notify->type;
- if (notify_type == 0 ||
- cmpxchg(&notify->type, notify_type, 0) != notify_type) {
- continue;
- }
-
- DBUG_ON(notify_type != XPC_N_CALL);
-
- atomic_dec(&ch->n_to_notify);
-
- if (notify->func != NULL) {
- dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
- "msg_number=%ld, partid=%d, channel=%d\n",
- (void *)notify, get, ch->partid, ch->number);
-
- notify->func(reason, ch->partid, ch->number,
- notify->key);
-
- dev_dbg(xpc_chan, "notify->func() returned, "
- "notify=0x%p, msg_number=%ld, partid=%d, "
- "channel=%d\n", (void *)notify, get,
- ch->partid, ch->number);
- }
- }
-}
-
-/*
- * Free up message queues and other stuff that were allocated for the specified
- * channel.
- *
- * Note: ch->reason and ch->reason_line are left set for debugging purposes,
- * they're cleared when XPC_C_DISCONNECTED is cleared.
- */
-static void
-xpc_free_msgqueues(struct xpc_channel *ch)
-{
- DBUG_ON(!spin_is_locked(&ch->lock));
- DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
-
- ch->remote_msgqueue_pa = 0;
- ch->func = NULL;
- ch->key = NULL;
- ch->msg_size = 0;
- ch->local_nentries = 0;
- ch->remote_nentries = 0;
- ch->kthreads_assigned_limit = 0;
- ch->kthreads_idle_limit = 0;
-
- ch->local_GP->get = 0;
- ch->local_GP->put = 0;
- ch->remote_GP.get = 0;
- ch->remote_GP.put = 0;
- ch->w_local_GP.get = 0;
- ch->w_local_GP.put = 0;
- ch->w_remote_GP.get = 0;
- ch->w_remote_GP.put = 0;
- ch->next_msg_to_pull = 0;
-
- if (ch->flags & XPC_C_SETUP) {
- ch->flags &= ~XPC_C_SETUP;
-
- dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
- ch->flags, ch->partid, ch->number);
-
- kfree(ch->local_msgqueue_base);
- ch->local_msgqueue = NULL;
- kfree(ch->remote_msgqueue_base);
- ch->remote_msgqueue = NULL;
- kfree(ch->notify_queue);
- ch->notify_queue = NULL;
- }
-}
-
-/*
* spin_lock_irqsave() is expected to be held on entry.
*/
static void
@@ -717,9 +97,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
- if (part->act_state == XPC_P_DEACTIVATING) {
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
/* can't proceed until the other side disengages from us */
- if (xpc_partition_engaged(1UL << ch->partid))
+ if (xpc_partition_engaged(ch->partid))
return;
} else {
@@ -731,7 +111,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
if (!(ch->flags & XPC_C_CLOSEREPLY)) {
ch->flags |= XPC_C_CLOSEREPLY;
- xpc_IPI_send_closereply(ch, irq_flags);
+ xpc_send_chctl_closereply(ch, irq_flags);
}
if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -740,8 +120,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
/* wake those waiting for notify completion */
if (atomic_read(&ch->n_to_notify) > 0) {
- /* >>> we do callout while holding ch->lock */
- xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+ /* we do callout while holding ch->lock, callout can't block */
+ xpc_notify_senders_of_disconnect(ch);
}
/* both sides are disconnected now */
@@ -752,10 +132,24 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
spin_lock_irqsave(&ch->lock, *irq_flags);
}
+ DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
/* it's now safe to free the channel's message queues */
- xpc_free_msgqueues(ch);
+ xpc_teardown_msg_structures(ch);
- /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+ ch->func = NULL;
+ ch->key = NULL;
+ ch->entry_size = 0;
+ ch->local_nentries = 0;
+ ch->remote_nentries = 0;
+ ch->kthreads_assigned_limit = 0;
+ ch->kthreads_idle_limit = 0;
+
+ /*
+ * Mark the channel disconnected and clear all other flags, including
+ * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but
+ * not including XPC_C_WDISCONNECT (if it was set).
+ */
ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
atomic_dec(&part->nchannels_active);
@@ -768,15 +162,15 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
if (ch->flags & XPC_C_WDISCONNECT) {
/* we won't lose the CPU since we're holding ch->lock */
complete(&ch->wdisconnect_wait);
- } else if (ch->delayed_IPI_flags) {
- if (part->act_state != XPC_P_DEACTIVATING) {
- /* time to take action on any delayed IPI flags */
- spin_lock(&part->IPI_lock);
- XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
- ch->delayed_IPI_flags);
- spin_unlock(&part->IPI_lock);
+ } else if (ch->delayed_chctl_flags) {
+ if (part->act_state != XPC_P_AS_DEACTIVATING) {
+ /* time to take action on any delayed chctl flags */
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch->number] |=
+ ch->delayed_chctl_flags;
+ spin_unlock(&part->chctl_lock);
}
- ch->delayed_IPI_flags = 0;
+ ch->delayed_chctl_flags = 0;
}
}
@@ -784,8 +178,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
* Process a change in the channel's remote connection state.
*/
static void
-xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
- u8 IPI_flags)
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+ u8 chctl_flags)
{
unsigned long irq_flags;
struct xpc_openclose_args *args =
@@ -800,24 +194,24 @@ again:
if ((ch->flags & XPC_C_DISCONNECTED) &&
(ch->flags & XPC_C_WDISCONNECT)) {
/*
- * Delay processing IPI flags until thread waiting disconnect
+ * Delay processing chctl flags until thread waiting disconnect
* has had a chance to see that the channel is disconnected.
*/
- ch->delayed_IPI_flags |= IPI_flags;
+ ch->delayed_chctl_flags |= chctl_flags;
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
- if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
+ if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
- dev_dbg(xpc_chan, "XPC_IPI_CLOSEREQUEST (reason=%d) received "
+ dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
"from partid=%d, channel=%d\n", args->reason,
ch->partid, ch->number);
/*
* If RCLOSEREQUEST is set, we're probably waiting for
* RCLOSEREPLY. We should find it and a ROPENREQUEST packed
- * with this RCLOSEREQUEST in the IPI_flags.
+ * with this RCLOSEREQUEST in the chctl_flags.
*/
if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -826,8 +220,8 @@ again:
DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
- DBUG_ON(!(IPI_flags & XPC_IPI_CLOSEREPLY));
- IPI_flags &= ~XPC_IPI_CLOSEREPLY;
+ DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+ chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
ch->flags |= XPC_C_RCLOSEREPLY;
/* both sides have finished disconnecting */
@@ -837,17 +231,15 @@ again:
}
if (ch->flags & XPC_C_DISCONNECTED) {
- if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
- if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
- ch_number) &
- XPC_IPI_OPENREQUEST)) {
-
- DBUG_ON(ch->delayed_IPI_flags != 0);
- spin_lock(&part->IPI_lock);
- XPC_SET_IPI_FLAGS(part->local_IPI_amo,
- ch_number,
- XPC_IPI_CLOSEREQUEST);
- spin_unlock(&part->IPI_lock);
+ if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+ if (part->chctl.flags[ch_number] &
+ XPC_CHCTL_OPENREQUEST) {
+
+ DBUG_ON(ch->delayed_chctl_flags != 0);
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch_number] |=
+ XPC_CHCTL_CLOSEREQUEST;
+ spin_unlock(&part->chctl_lock);
}
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
@@ -860,7 +252,7 @@ again:
ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
}
- IPI_flags &= ~(XPC_IPI_OPENREQUEST | XPC_IPI_OPENREPLY);
+ chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY);
/*
* The meaningful CLOSEREQUEST connection state fields are:
@@ -878,7 +270,7 @@ again:
XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
- DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+ DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
@@ -886,13 +278,13 @@ again:
xpc_process_disconnect(ch, &irq_flags);
}
- if (IPI_flags & XPC_IPI_CLOSEREPLY) {
+ if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
- dev_dbg(xpc_chan, "XPC_IPI_CLOSEREPLY received from partid=%d,"
- " channel=%d\n", ch->partid, ch->number);
+ dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+ "%d, channel=%d\n", ch->partid, ch->number);
if (ch->flags & XPC_C_DISCONNECTED) {
- DBUG_ON(part->act_state != XPC_P_DEACTIVATING);
+ DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
@@ -900,15 +292,14 @@ again:
DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
- if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
- & XPC_IPI_CLOSEREQUEST)) {
-
- DBUG_ON(ch->delayed_IPI_flags != 0);
- spin_lock(&part->IPI_lock);
- XPC_SET_IPI_FLAGS(part->local_IPI_amo,
- ch_number,
- XPC_IPI_CLOSEREPLY);
- spin_unlock(&part->IPI_lock);
+ if (part->chctl.flags[ch_number] &
+ XPC_CHCTL_CLOSEREQUEST) {
+
+ DBUG_ON(ch->delayed_chctl_flags != 0);
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch_number] |=
+ XPC_CHCTL_CLOSEREPLY;
+ spin_unlock(&part->chctl_lock);
}
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
@@ -922,21 +313,21 @@ again:
}
}
- if (IPI_flags & XPC_IPI_OPENREQUEST) {
+ if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
- dev_dbg(xpc_chan, "XPC_IPI_OPENREQUEST (msg_size=%d, "
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
"local_nentries=%d) received from partid=%d, "
- "channel=%d\n", args->msg_size, args->local_nentries,
+ "channel=%d\n", args->entry_size, args->local_nentries,
ch->partid, ch->number);
- if (part->act_state == XPC_P_DEACTIVATING ||
+ if (part->act_state == XPC_P_AS_DEACTIVATING ||
(ch->flags & XPC_C_ROPENREQUEST)) {
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
- ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
+ ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
@@ -947,10 +338,10 @@ again:
/*
* The meaningful OPENREQUEST connection state fields are:
- * msg_size = size of channel's messages in bytes
+ * entry_size = size of channel's messages in bytes
* local_nentries = remote partition's local_nentries
*/
- if (args->msg_size == 0 || args->local_nentries == 0) {
+ if (args->entry_size == 0 || args->local_nentries == 0) {
/* assume OPENREQUEST was delayed by mistake */
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
@@ -960,14 +351,14 @@ again:
ch->remote_nentries = args->local_nentries;
if (ch->flags & XPC_C_OPENREQUEST) {
- if (args->msg_size != ch->msg_size) {
+ if (args->entry_size != ch->entry_size) {
XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
&irq_flags);
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
} else {
- ch->msg_size = args->msg_size;
+ ch->entry_size = args->entry_size;
XPC_SET_REASON(ch, 0, 0);
ch->flags &= ~XPC_C_DISCONNECTED;
@@ -978,13 +369,13 @@ again:
xpc_process_connect(ch, &irq_flags);
}
- if (IPI_flags & XPC_IPI_OPENREPLY) {
+ if (chctl_flags & XPC_CHCTL_OPENREPLY) {
- dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY (local_msgqueue_pa=0x%lx, "
- "local_nentries=%d, remote_nentries=%d) received from "
- "partid=%d, channel=%d\n", args->local_msgqueue_pa,
- args->local_nentries, args->remote_nentries,
- ch->partid, ch->number);
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+ "0x%lx, local_nentries=%d, remote_nentries=%d) "
+ "received from partid=%d, channel=%d\n",
+ args->local_msgqueue_pa, args->local_nentries,
+ args->remote_nentries, ch->partid, ch->number);
if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1012,10 +403,10 @@ again:
DBUG_ON(args->remote_nentries == 0);
ch->flags |= XPC_C_ROPENREPLY;
- ch->remote_msgqueue_pa = args->local_msgqueue_pa;
+ xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
if (args->local_nentries < ch->remote_nentries) {
- dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
"remote_nentries=%d, old remote_nentries=%d, "
"partid=%d, channel=%d\n",
args->local_nentries, ch->remote_nentries,
@@ -1024,7 +415,7 @@ again:
ch->remote_nentries = args->local_nentries;
}
if (args->remote_nentries < ch->local_nentries) {
- dev_dbg(xpc_chan, "XPC_IPI_OPENREPLY: new "
+ dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
"local_nentries=%d, old local_nentries=%d, "
"partid=%d, channel=%d\n",
args->remote_nentries, ch->local_nentries,
@@ -1082,7 +473,7 @@ xpc_connect_channel(struct xpc_channel *ch)
ch->local_nentries = registration->nentries;
if (ch->flags & XPC_C_ROPENREQUEST) {
- if (registration->msg_size != ch->msg_size) {
+ if (registration->entry_size != ch->entry_size) {
/* the local and remote sides aren't the same */
/*
@@ -1101,7 +492,7 @@ xpc_connect_channel(struct xpc_channel *ch)
return xpUnequalMsgSizes;
}
} else {
- ch->msg_size = registration->msg_size;
+ ch->entry_size = registration->entry_size;
XPC_SET_REASON(ch, 0, 0);
ch->flags &= ~XPC_C_DISCONNECTED;
@@ -1114,7 +505,7 @@ xpc_connect_channel(struct xpc_channel *ch)
/* initiate the connection */
ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
- xpc_IPI_send_openrequest(ch, &irq_flags);
+ xpc_send_chctl_openrequest(ch, &irq_flags);
xpc_process_connect(ch, &irq_flags);
@@ -1123,152 +514,16 @@ xpc_connect_channel(struct xpc_channel *ch)
return xpSuccess;
}
-/*
- * Clear some of the msg flags in the local message queue.
- */
-static inline void
-xpc_clear_local_msgqueue_flags(struct xpc_channel *ch)
-{
- struct xpc_msg *msg;
- s64 get;
-
- get = ch->w_remote_GP.get;
- do {
- msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
- (get % ch->local_nentries) *
- ch->msg_size);
- msg->flags = 0;
- } while (++get < ch->remote_GP.get);
-}
-
-/*
- * Clear some of the msg flags in the remote message queue.
- */
-static inline void
-xpc_clear_remote_msgqueue_flags(struct xpc_channel *ch)
-{
- struct xpc_msg *msg;
- s64 put;
-
- put = ch->w_remote_GP.put;
- do {
- msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
- (put % ch->remote_nentries) *
- ch->msg_size);
- msg->flags = 0;
- } while (++put < ch->remote_GP.put);
-}
-
-static void
-xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
-{
- struct xpc_channel *ch = &part->channels[ch_number];
- int nmsgs_sent;
-
- ch->remote_GP = part->remote_GPs[ch_number];
-
- /* See what, if anything, has changed for each connected channel */
-
- xpc_msgqueue_ref(ch);
-
- if (ch->w_remote_GP.get == ch->remote_GP.get &&
- ch->w_remote_GP.put == ch->remote_GP.put) {
- /* nothing changed since GPs were last pulled */
- xpc_msgqueue_deref(ch);
- return;
- }
-
- if (!(ch->flags & XPC_C_CONNECTED)) {
- xpc_msgqueue_deref(ch);
- return;
- }
-
- /*
- * First check to see if messages recently sent by us have been
- * received by the other side. (The remote GET value will have
- * changed since we last looked at it.)
- */
-
- if (ch->w_remote_GP.get != ch->remote_GP.get) {
-
- /*
- * We need to notify any senders that want to be notified
- * that their sent messages have been received by their
- * intended recipients. We need to do this before updating
- * w_remote_GP.get so that we don't allocate the same message
- * queue entries prematurely (see xpc_allocate_msg()).
- */
- if (atomic_read(&ch->n_to_notify) > 0) {
- /*
- * Notify senders that messages sent have been
- * received and delivered by the other side.
- */
- xpc_notify_senders(ch, xpMsgDelivered,
- ch->remote_GP.get);
- }
-
- /*
- * Clear msg->flags in previously sent messages, so that
- * they're ready for xpc_allocate_msg().
- */
- xpc_clear_local_msgqueue_flags(ch);
-
- ch->w_remote_GP.get = ch->remote_GP.get;
-
- dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
- "channel=%d\n", ch->w_remote_GP.get, ch->partid,
- ch->number);
-
- /*
- * If anyone was waiting for message queue entries to become
- * available, wake them up.
- */
- if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
- wake_up(&ch->msg_allocate_wq);
- }
-
- /*
- * Now check for newly sent messages by the other side. (The remote
- * PUT value will have changed since we last looked at it.)
- */
-
- if (ch->w_remote_GP.put != ch->remote_GP.put) {
- /*
- * Clear msg->flags in previously received messages, so that
- * they're ready for xpc_get_deliverable_msg().
- */
- xpc_clear_remote_msgqueue_flags(ch);
-
- ch->w_remote_GP.put = ch->remote_GP.put;
-
- dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
- "channel=%d\n", ch->w_remote_GP.put, ch->partid,
- ch->number);
-
- nmsgs_sent = ch->w_remote_GP.put - ch->w_local_GP.get;
- if (nmsgs_sent > 0) {
- dev_dbg(xpc_chan, "msgs waiting to be copied and "
- "delivered=%d, partid=%d, channel=%d\n",
- nmsgs_sent, ch->partid, ch->number);
-
- if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
- xpc_activate_kthreads(ch, nmsgs_sent);
- }
- }
-
- xpc_msgqueue_deref(ch);
-}
-
void
-xpc_process_channel_activity(struct xpc_partition *part)
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
{
unsigned long irq_flags;
- u64 IPI_amo, IPI_flags;
+ union xpc_channel_ctl_flags chctl;
struct xpc_channel *ch;
int ch_number;
u32 ch_flags;
- IPI_amo = xpc_get_IPI_flags(part);
+ chctl.all_flags = xpc_get_chctl_all_flags(part);
/*
* Initiate channel connections for registered channels.
@@ -1281,14 +536,14 @@ xpc_process_channel_activity(struct xpc_partition *part)
ch = &part->channels[ch_number];
/*
- * Process any open or close related IPI flags, and then deal
+ * Process any open or close related chctl flags, and then deal
* with connecting or disconnecting the channel as required.
*/
- IPI_flags = XPC_GET_IPI_FLAGS(IPI_amo, ch_number);
-
- if (XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(IPI_flags))
- xpc_process_openclose_IPI(part, ch_number, IPI_flags);
+ if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+ xpc_process_openclose_chctl_flags(part, ch_number,
+ chctl.flags[ch_number]);
+ }
ch_flags = ch->flags; /* need an atomic snapshot of flags */
@@ -1299,7 +554,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
continue;
}
- if (part->act_state == XPC_P_DEACTIVATING)
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
continue;
if (!(ch_flags & XPC_C_CONNECTED)) {
@@ -1315,13 +570,13 @@ xpc_process_channel_activity(struct xpc_partition *part)
}
/*
- * Process any message related IPI flags, this may involve the
- * activation of kthreads to deliver any pending messages sent
- * from the other partition.
+ * Process any message related chctl flags, this may involve
+ * the activation of kthreads to deliver any pending messages
+ * sent from the other partition.
*/
- if (XPC_ANY_MSG_IPI_FLAGS_SET(IPI_flags))
- xpc_process_msg_IPI(part, ch_number);
+ if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+ xpc_process_msg_chctl_flags(part, ch_number);
}
}
@@ -1369,59 +624,6 @@ xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
}
/*
- * Teardown the infrastructure necessary to support XPartition Communication
- * between the specified remote partition and the local one.
- */
-void
-xpc_teardown_infrastructure(struct xpc_partition *part)
-{
- short partid = XPC_PARTID(part);
-
- /*
- * We start off by making this partition inaccessible to local
- * processes by marking it as no longer setup. Then we make it
- * inaccessible to remote processes by clearing the XPC per partition
- * specific variable's magic # (which indicates that these variables
- * are no longer valid) and by ignoring all XPC notify IPIs sent to
- * this partition.
- */
-
- DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
- DBUG_ON(atomic_read(&part->nchannels_active) != 0);
- DBUG_ON(part->setup_state != XPC_P_SETUP);
- part->setup_state = XPC_P_WTEARDOWN;
-
- xpc_vars_part[partid].magic = 0;
-
- free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
-
- /*
- * Before proceeding with the teardown we have to wait until all
- * existing references cease.
- */
- wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
-
- /* now we can begin tearing down the infrastructure */
-
- part->setup_state = XPC_P_TORNDOWN;
-
- /* in case we've still got outstanding timers registered... */
- del_timer_sync(&part->dropped_IPI_timer);
-
- kfree(part->remote_openclose_args_base);
- part->remote_openclose_args = NULL;
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->channels);
- part->channels = NULL;
- part->local_IPI_amo_va = NULL;
-}
-
-/*
* Called by XP at the time of channel connection registration to cause
* XPC to establish connections to all currently active partitions.
*/
@@ -1432,9 +634,9 @@ xpc_initiate_connect(int ch_number)
struct xpc_partition *part;
struct xpc_channel *ch;
- DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
part = &xpc_partitions[partid];
if (xpc_part_ref(part)) {
@@ -1488,10 +690,10 @@ xpc_initiate_disconnect(int ch_number)
struct xpc_partition *part;
struct xpc_channel *ch;
- DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
/* initiate the channel disconnect for every active partition */
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
part = &xpc_partitions[partid];
if (xpc_part_ref(part)) {
@@ -1550,7 +752,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
XPC_C_CONNECTING | XPC_C_CONNECTED);
- xpc_IPI_send_closerequest(ch, irq_flags);
+ xpc_send_chctl_closerequest(ch, irq_flags);
if (channel_was_connected)
ch->flags |= XPC_C_WASCONNECTED;
@@ -1598,7 +800,7 @@ xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
* Wait for a message entry to become available for the specified channel,
* but don't wait any longer than 1 jiffy.
*/
-static enum xp_retval
+enum xp_retval
xpc_allocate_msg_wait(struct xpc_channel *ch)
{
enum xp_retval ret;
@@ -1625,315 +827,54 @@ xpc_allocate_msg_wait(struct xpc_channel *ch)
}
/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel.
- */
-static enum xp_retval
-xpc_allocate_msg(struct xpc_channel *ch, u32 flags,
- struct xpc_msg **address_of_msg)
-{
- struct xpc_msg *msg;
- enum xp_retval ret;
- s64 put;
-
- /* this reference will be dropped in xpc_send_msg() */
- xpc_msgqueue_ref(ch);
-
- if (ch->flags & XPC_C_DISCONNECTING) {
- xpc_msgqueue_deref(ch);
- return ch->reason;
- }
- if (!(ch->flags & XPC_C_CONNECTED)) {
- xpc_msgqueue_deref(ch);
- return xpNotConnected;
- }
-
- /*
- * Get the next available message entry from the local message queue.
- * If none are available, we'll make sure that we grab the latest
- * GP values.
- */
- ret = xpTimeout;
-
- while (1) {
-
- put = ch->w_local_GP.put;
- rmb(); /* guarantee that .put loads before .get */
- if (put - ch->w_remote_GP.get < ch->local_nentries) {
-
- /* There are available message entries. We need to try
- * to secure one for ourselves. We'll do this by trying
- * to increment w_local_GP.put as long as someone else
- * doesn't beat us to it. If they do, we'll have to
- * try again.
- */
- if (cmpxchg(&ch->w_local_GP.put, put, put + 1) == put) {
- /* we got the entry referenced by put */
- break;
- }
- continue; /* try again */
- }
-
- /*
- * There aren't any available msg entries at this time.
- *
- * In waiting for a message entry to become available,
- * we set a timeout in case the other side is not
- * sending completion IPIs. This lets us fake an IPI
- * that will cause the IPI handler to fetch the latest
- * GP values as if an IPI was sent by the other side.
- */
- if (ret == xpTimeout)
- xpc_IPI_send_local_msgrequest(ch);
-
- if (flags & XPC_NOWAIT) {
- xpc_msgqueue_deref(ch);
- return xpNoWait;
- }
-
- ret = xpc_allocate_msg_wait(ch);
- if (ret != xpInterrupted && ret != xpTimeout) {
- xpc_msgqueue_deref(ch);
- return ret;
- }
- }
-
- /* get the message's address and initialize it */
- msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
- (put % ch->local_nentries) * ch->msg_size);
-
- DBUG_ON(msg->flags != 0);
- msg->number = put;
-
- dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
- "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
- (void *)msg, msg->number, ch->partid, ch->number);
-
- *address_of_msg = msg;
-
- return xpSuccess;
-}
-
-/*
- * Allocate an entry for a message from the message queue associated with the
- * specified channel. NOTE that this routine can sleep waiting for a message
- * entry to become available. To not sleep, pass in the XPC_NOWAIT flag.
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
*
- * Arguments:
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
*
- * partid - ID of partition to which the channel is connected.
- * ch_number - channel #.
- * flags - see xpc.h for valid flags.
- * payload - address of the allocated payload area pointer (filled in on
- * return) in which the user-defined message is constructed.
- */
-enum xp_retval
-xpc_initiate_allocate(short partid, int ch_number, u32 flags, void **payload)
-{
- struct xpc_partition *part = &xpc_partitions[partid];
- enum xp_retval ret = xpUnknownReason;
- struct xpc_msg *msg = NULL;
-
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
- DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
-
- *payload = NULL;
-
- if (xpc_part_ref(part)) {
- ret = xpc_allocate_msg(&part->channels[ch_number], flags, &msg);
- xpc_part_deref(part);
-
- if (msg != NULL)
- *payload = &msg->payload;
- }
-
- return ret;
-}
-
-/*
- * Now we actually send the messages that are ready to be sent by advancing
- * the local message queue's Put value and then send an IPI to the recipient
- * partition.
- */
-static void
-xpc_send_msgs(struct xpc_channel *ch, s64 initial_put)
-{
- struct xpc_msg *msg;
- s64 put = initial_put + 1;
- int send_IPI = 0;
-
- while (1) {
-
- while (1) {
- if (put == ch->w_local_GP.put)
- break;
-
- msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
- (put % ch->local_nentries) *
- ch->msg_size);
-
- if (!(msg->flags & XPC_M_READY))
- break;
-
- put++;
- }
-
- if (put == initial_put) {
- /* nothing's changed */
- break;
- }
-
- if (cmpxchg_rel(&ch->local_GP->put, initial_put, put) !=
- initial_put) {
- /* someone else beat us to it */
- DBUG_ON(ch->local_GP->put < initial_put);
- break;
- }
-
- /* we just set the new value of local_GP->put */
-
- dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
- "channel=%d\n", put, ch->partid, ch->number);
-
- send_IPI = 1;
-
- /*
- * We need to ensure that the message referenced by
- * local_GP->put is not XPC_M_READY or that local_GP->put
- * equals w_local_GP.put, so we'll go have a look.
- */
- initial_put = put;
- }
-
- if (send_IPI)
- xpc_IPI_send_msgrequest(ch);
-}
-
-/*
- * Common code that does the actual sending of the message by advancing the
- * local message queue's Put value and sends an IPI to the partition the
- * message is being sent to.
- */
-static enum xp_retval
-xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
- xpc_notify_func func, void *key)
-{
- enum xp_retval ret = xpSuccess;
- struct xpc_notify *notify = notify;
- s64 put, msg_number = msg->number;
-
- DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
- DBUG_ON((((u64)msg - (u64)ch->local_msgqueue) / ch->msg_size) !=
- msg_number % ch->local_nentries);
- DBUG_ON(msg->flags & XPC_M_READY);
-
- if (ch->flags & XPC_C_DISCONNECTING) {
- /* drop the reference grabbed in xpc_allocate_msg() */
- xpc_msgqueue_deref(ch);
- return ch->reason;
- }
-
- if (notify_type != 0) {
- /*
- * Tell the remote side to send an ACK interrupt when the
- * message has been delivered.
- */
- msg->flags |= XPC_M_INTERRUPT;
-
- atomic_inc(&ch->n_to_notify);
-
- notify = &ch->notify_queue[msg_number % ch->local_nentries];
- notify->func = func;
- notify->key = key;
- notify->type = notify_type;
-
- /* >>> is a mb() needed here? */
-
- if (ch->flags & XPC_C_DISCONNECTING) {
- /*
- * An error occurred between our last error check and
- * this one. We will try to clear the type field from
- * the notify entry. If we succeed then
- * xpc_disconnect_channel() didn't already process
- * the notify entry.
- */
- if (cmpxchg(&notify->type, notify_type, 0) ==
- notify_type) {
- atomic_dec(&ch->n_to_notify);
- ret = ch->reason;
- }
-
- /* drop the reference grabbed in xpc_allocate_msg() */
- xpc_msgqueue_deref(ch);
- return ret;
- }
- }
-
- msg->flags |= XPC_M_READY;
-
- /*
- * The preceding store of msg->flags must occur before the following
- * load of ch->local_GP->put.
- */
- mb();
-
- /* see if the message is next in line to be sent, if so send it */
-
- put = ch->local_GP->put;
- if (put == msg_number)
- xpc_send_msgs(ch, put);
-
- /* drop the reference grabbed in xpc_allocate_msg() */
- xpc_msgqueue_deref(ch);
- return ret;
-}
-
-/*
- * Send a message previously allocated using xpc_initiate_allocate() on the
- * specified channel connected to the specified partition.
- *
- * This routine will not wait for the message to be received, nor will
- * notification be given when it does happen. Once this routine has returned
- * the message entry allocated via xpc_initiate_allocate() is no longer
- * accessable to the caller.
- *
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
*
* Arguments:
*
* partid - ID of partition to which the channel is connected.
* ch_number - channel # to send message on.
- * payload - pointer to the payload area allocated via
- * xpc_initiate_allocate().
+ * flags - see xp.h for valid flags.
+ * payload - pointer to the payload which is to be sent.
+ * payload_size - size of the payload in bytes.
*/
enum xp_retval
-xpc_initiate_send(short partid, int ch_number, void *payload)
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size)
{
struct xpc_partition *part = &xpc_partitions[partid];
- struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
- enum xp_retval ret;
+ enum xp_retval ret = xpUnknownReason;
- dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+ dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
partid, ch_number);
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
- DBUG_ON(msg == NULL);
+ DBUG_ON(payload == NULL);
- ret = xpc_send_msg(&part->channels[ch_number], msg, 0, NULL, NULL);
+ if (xpc_part_ref(part)) {
+ ret = xpc_send_payload(&part->channels[ch_number], flags,
+ payload, payload_size, 0, NULL, NULL);
+ xpc_part_deref(part);
+ }
return ret;
}
/*
- * Send a message previously allocated using xpc_initiate_allocate on the
- * specified channel connected to the specified partition.
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
*
- * This routine will not wait for the message to be sent. Once this routine
- * has returned the message entry allocated via xpc_initiate_allocate() is no
- * longer accessable to the caller.
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
*
* Once the remote end of the channel has received the message, the function
* passed as an argument to xpc_initiate_send_notify() will be called. This
@@ -1943,158 +884,51 @@ xpc_initiate_send(short partid, int ch_number, void *payload)
*
* If this routine returns an error, the caller's function will NOT be called.
*
- * This routine, although called by users, does not call xpc_part_ref() to
- * ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_allocate_msg().
- *
* Arguments:
*
* partid - ID of partition to which the channel is connected.
* ch_number - channel # to send message on.
- * payload - pointer to the payload area allocated via
- * xpc_initiate_allocate().
+ * flags - see xp.h for valid flags.
+ * payload - pointer to the payload which is to be sent.
+ * payload_size - size of the payload in bytes.
* func - function to call with asynchronous notification of message
* receipt. THIS FUNCTION MUST BE NON-BLOCKING.
* key - user-defined key to be passed to the function when it's called.
*/
enum xp_retval
-xpc_initiate_send_notify(short partid, int ch_number, void *payload,
- xpc_notify_func func, void *key)
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+ u16 payload_size, xpc_notify_func func, void *key)
{
struct xpc_partition *part = &xpc_partitions[partid];
- struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
- enum xp_retval ret;
+ enum xp_retval ret = xpUnknownReason;
- dev_dbg(xpc_chan, "msg=0x%p, partid=%d, channel=%d\n", (void *)msg,
+ dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
partid, ch_number);
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
- DBUG_ON(msg == NULL);
+ DBUG_ON(payload == NULL);
DBUG_ON(func == NULL);
- ret = xpc_send_msg(&part->channels[ch_number], msg, XPC_N_CALL,
- func, key);
- return ret;
-}
-
-static struct xpc_msg *
-xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
-{
- struct xpc_partition *part = &xpc_partitions[ch->partid];
- struct xpc_msg *remote_msg, *msg;
- u32 msg_index, nmsgs;
- u64 msg_offset;
- enum xp_retval ret;
-
- if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
- /* we were interrupted by a signal */
- return NULL;
- }
-
- while (get >= ch->next_msg_to_pull) {
-
- /* pull as many messages as are ready and able to be pulled */
-
- msg_index = ch->next_msg_to_pull % ch->remote_nentries;
-
- DBUG_ON(ch->next_msg_to_pull >= ch->w_remote_GP.put);
- nmsgs = ch->w_remote_GP.put - ch->next_msg_to_pull;
- if (msg_index + nmsgs > ch->remote_nentries) {
- /* ignore the ones that wrap the msg queue for now */
- nmsgs = ch->remote_nentries - msg_index;
- }
-
- msg_offset = msg_index * ch->msg_size;
- msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
- remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
- msg_offset);
-
- ret = xpc_pull_remote_cachelines(part, msg, remote_msg,
- nmsgs * ch->msg_size);
- if (ret != xpSuccess) {
-
- dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
- " msg %ld from partition %d, channel=%d, "
- "ret=%d\n", nmsgs, ch->next_msg_to_pull,
- ch->partid, ch->number, ret);
-
- XPC_DEACTIVATE_PARTITION(part, ret);
-
- mutex_unlock(&ch->msg_to_pull_mutex);
- return NULL;
- }
-
- ch->next_msg_to_pull += nmsgs;
+ if (xpc_part_ref(part)) {
+ ret = xpc_send_payload(&part->channels[ch_number], flags,
+ payload, payload_size, XPC_N_CALL, func,
+ key);
+ xpc_part_deref(part);
}
-
- mutex_unlock(&ch->msg_to_pull_mutex);
-
- /* return the message we were looking for */
- msg_offset = (get % ch->remote_nentries) * ch->msg_size;
- msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
-
- return msg;
-}
-
-/*
- * Get a message to be delivered.
- */
-static struct xpc_msg *
-xpc_get_deliverable_msg(struct xpc_channel *ch)
-{
- struct xpc_msg *msg = NULL;
- s64 get;
-
- do {
- if (ch->flags & XPC_C_DISCONNECTING)
- break;
-
- get = ch->w_local_GP.get;
- rmb(); /* guarantee that .get loads before .put */
- if (get == ch->w_remote_GP.put)
- break;
-
- /* There are messages waiting to be pulled and delivered.
- * We need to try to secure one for ourselves. We'll do this
- * by trying to increment w_local_GP.get and hope that no one
- * else beats us to it. If they do, we'll we'll simply have
- * to try again for the next one.
- */
-
- if (cmpxchg(&ch->w_local_GP.get, get, get + 1) == get) {
- /* we got the entry referenced by get */
-
- dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
- "partid=%d, channel=%d\n", get + 1,
- ch->partid, ch->number);
-
- /* pull the message from the remote partition */
-
- msg = xpc_pull_remote_msg(ch, get);
-
- DBUG_ON(msg != NULL && msg->number != get);
- DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
- DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
-
- break;
- }
-
- } while (1);
-
- return msg;
+ return ret;
}
/*
- * Deliver a message to its intended recipient.
+ * Deliver a message's payload to its intended recipient.
*/
void
-xpc_deliver_msg(struct xpc_channel *ch)
+xpc_deliver_payload(struct xpc_channel *ch)
{
- struct xpc_msg *msg;
+ void *payload;
- msg = xpc_get_deliverable_msg(ch);
- if (msg != NULL) {
+ payload = xpc_get_deliverable_payload(ch);
+ if (payload != NULL) {
/*
* This ref is taken to protect the payload itself from being
@@ -2106,18 +940,16 @@ xpc_deliver_msg(struct xpc_channel *ch)
atomic_inc(&ch->kthreads_active);
if (ch->func != NULL) {
- dev_dbg(xpc_chan, "ch->func() called, msg=0x%p, "
- "msg_number=%ld, partid=%d, channel=%d\n",
- (void *)msg, msg->number, ch->partid,
+ dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+ "partid=%d channel=%d\n", payload, ch->partid,
ch->number);
/* deliver the message to its intended recipient */
- ch->func(xpMsgReceived, ch->partid, ch->number,
- &msg->payload, ch->key);
+ ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+ ch->key);
- dev_dbg(xpc_chan, "ch->func() returned, msg=0x%p, "
- "msg_number=%ld, partid=%d, channel=%d\n",
- (void *)msg, msg->number, ch->partid,
+ dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+ "partid=%d channel=%d\n", payload, ch->partid,
ch->number);
}
@@ -2126,118 +958,31 @@ xpc_deliver_msg(struct xpc_channel *ch)
}
/*
- * Now we actually acknowledge the messages that have been delivered and ack'd
- * by advancing the cached remote message queue's Get value and if requested
- * send an IPI to the message sender's partition.
- */
-static void
-xpc_acknowledge_msgs(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
-{
- struct xpc_msg *msg;
- s64 get = initial_get + 1;
- int send_IPI = 0;
-
- while (1) {
-
- while (1) {
- if (get == ch->w_local_GP.get)
- break;
-
- msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
- (get % ch->remote_nentries) *
- ch->msg_size);
-
- if (!(msg->flags & XPC_M_DONE))
- break;
-
- msg_flags |= msg->flags;
- get++;
- }
-
- if (get == initial_get) {
- /* nothing's changed */
- break;
- }
-
- if (cmpxchg_rel(&ch->local_GP->get, initial_get, get) !=
- initial_get) {
- /* someone else beat us to it */
- DBUG_ON(ch->local_GP->get <= initial_get);
- break;
- }
-
- /* we just set the new value of local_GP->get */
-
- dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
- "channel=%d\n", get, ch->partid, ch->number);
-
- send_IPI = (msg_flags & XPC_M_INTERRUPT);
-
- /*
- * We need to ensure that the message referenced by
- * local_GP->get is not XPC_M_DONE or that local_GP->get
- * equals w_local_GP.get, so we'll go have a look.
- */
- initial_get = get;
- }
-
- if (send_IPI)
- xpc_IPI_send_msgrequest(ch);
-}
-
-/*
- * Acknowledge receipt of a delivered message.
- *
- * If a message has XPC_M_INTERRUPT set, send an interrupt to the partition
- * that sent the message.
+ * Acknowledge receipt of a delivered message's payload.
*
* This function, although called by users, does not call xpc_part_ref() to
* ensure that the partition infrastructure is in place. It relies on the
- * fact that we called xpc_msgqueue_ref() in xpc_deliver_msg().
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
*
* Arguments:
*
* partid - ID of partition to which the channel is connected.
* ch_number - channel # message received on.
* payload - pointer to the payload area allocated via
- * xpc_initiate_allocate().
+ * xpc_initiate_send() or xpc_initiate_send_notify().
*/
void
xpc_initiate_received(short partid, int ch_number, void *payload)
{
struct xpc_partition *part = &xpc_partitions[partid];
struct xpc_channel *ch;
- struct xpc_msg *msg = XPC_MSG_ADDRESS(payload);
- s64 get, msg_number = msg->number;
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
ch = &part->channels[ch_number];
+ xpc_received_payload(ch, payload);
- dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
- (void *)msg, msg_number, ch->partid, ch->number);
-
- DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
- msg_number % ch->remote_nentries);
- DBUG_ON(msg->flags & XPC_M_DONE);
-
- msg->flags |= XPC_M_DONE;
-
- /*
- * The preceding store of msg->flags must occur before the following
- * load of ch->local_GP->get.
- */
- mb();
-
- /*
- * See if this message is next in line to be acknowledged as having
- * been delivered.
- */
- get = ch->local_GP->get;
- if (get == msg_number)
- xpc_acknowledge_msgs(ch, get, msg->flags);
-
- /* the call to xpc_msgqueue_ref() was done by xpc_deliver_msg() */
+ /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */
xpc_msgqueue_deref(ch);
}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index c3b4227f48a..46325fc8481 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -25,37 +25,31 @@
*
* Caveats:
*
- * . We currently have no way to determine which nasid an IPI came
- * from. Thus, xpc_IPI_send() does a remote AMO write followed by
- * an IPI. The AMO indicates where data is to be pulled from, so
- * after the IPI arrives, the remote partition checks the AMO word.
- * The IPI can actually arrive before the AMO however, so other code
- * must periodically check for this case. Also, remote AMO operations
- * do not reliably time out. Thus we do a remote PIO read solely to
- * know whether the remote partition is down and whether we should
- * stop sending IPIs to it. This remote PIO read operation is set up
- * in a special nofault region so SAL knows to ignore (and cleanup)
- * any errors due to the remote AMO write, PIO read, and/or PIO
- * write operations.
+ * . Currently on sn2, we have no way to determine which nasid an IRQ
+ * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ * followed by an IPI. The amo indicates where data is to be pulled
+ * from, so after the IPI arrives, the remote partition checks the amo
+ * word. The IPI can actually arrive before the amo however, so other
+ * code must periodically check for this case. Also, remote amo
+ * operations do not reliably time out. Thus we do a remote PIO read
+ * solely to know whether the remote partition is down and whether we
+ * should stop sending IPIs to it. This remote PIO read operation is
+ * set up in a special nofault region so SAL knows to ignore (and
+ * cleanup) any errors due to the remote amo write, PIO read, and/or
+ * PIO write operations.
*
* If/when new hardware solves this IPI problem, we should abandon
* the current approach.
*
*/
-#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
#include <linux/delay.h>
#include <linux/reboot.h>
-#include <linux/completion.h>
#include <linux/kdebug.h>
#include <linux/kthread.h>
-#include <linux/uaccess.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
#include "xpc.h"
/* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -89,9 +83,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
static int xpc_hb_check_min_interval = 10;
static int xpc_hb_check_max_interval = 120;
-int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
-static int xpc_disengage_request_min_timelimit; /* = 0 */
-static int xpc_disengage_request_max_timelimit = 120;
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit; /* = 0 */
+static int xpc_disengage_max_timelimit = 120;
static ctl_table xpc_sys_xpc_hb_dir[] = {
{
@@ -124,14 +118,14 @@ static ctl_table xpc_sys_xpc_dir[] = {
.child = xpc_sys_xpc_hb_dir},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "disengage_request_timelimit",
- .data = &xpc_disengage_request_timelimit,
+ .procname = "disengage_timelimit",
+ .data = &xpc_disengage_timelimit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &xpc_disengage_request_min_timelimit,
- .extra2 = &xpc_disengage_request_max_timelimit},
+ .extra1 = &xpc_disengage_min_timelimit,
+ .extra2 = &xpc_disengage_max_timelimit},
{}
};
static ctl_table xpc_sys_dir[] = {
@@ -144,16 +138,19 @@ static ctl_table xpc_sys_dir[] = {
};
static struct ctl_table_header *xpc_sysctl;
-/* non-zero if any remote partition disengage request was timed out */
-int xpc_disengage_request_timedout;
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
-/* #of IRQs received */
-static atomic_t xpc_act_IRQ_rcvd;
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
/* IRQ handler notifies this wait queue on receipt of an IRQ */
-static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+void *xpc_heartbeating_to_mask;
/* notification that the xpc_hb_checker thread has exited */
static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -161,8 +158,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited);
/* notification that the xpc_discovery thread has exited */
static DECLARE_COMPLETION(xpc_discovery_exited);
-static struct timer_list xpc_hb_timer;
-
static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
@@ -175,31 +170,76 @@ static struct notifier_block xpc_die_notifier = {
.notifier_call = xpc_system_die,
};
+int (*xpc_setup_partitions_sn) (void);
+enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
+ unsigned long *rp_pa,
+ size_t *len);
+int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
+void (*xpc_heartbeat_init) (void);
+void (*xpc_heartbeat_exit) (void);
+void (*xpc_increment_heartbeat) (void);
+void (*xpc_offline_heartbeat) (void);
+void (*xpc_online_heartbeat) (void);
+enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
+
+enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
+void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
+u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
+enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
+void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
+void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
+int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
+void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
+
+void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
+ unsigned long remote_rp_pa,
+ int nasid);
+void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
+void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
+void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
+
+void (*xpc_process_activate_IRQ_rcvd) (void);
+enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
+void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
+
+void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
+int (*xpc_partition_engaged) (short partid);
+int (*xpc_any_partition_engaged) (void);
+void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
+void (*xpc_assume_partition_disengaged) (short partid);
+
+void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
+ unsigned long *irq_flags);
+void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
+ unsigned long *irq_flags);
+void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
+ unsigned long *irq_flags);
+void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
+ unsigned long *irq_flags);
+
+void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
+ unsigned long msgqueue_pa);
+
+enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
+ void *payload, u16 payload_size,
+ u8 notify_type, xpc_notify_func func,
+ void *key);
+void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
+
/*
- * Timer function to enforce the timelimit on the partition disengage request.
+ * Timer function to enforce the timelimit on the partition disengage.
*/
static void
-xpc_timeout_partition_disengage_request(unsigned long data)
+xpc_timeout_partition_disengage(unsigned long data)
{
struct xpc_partition *part = (struct xpc_partition *)data;
- DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
+ DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
(void)xpc_partition_disengaged(part);
- DBUG_ON(part->disengage_request_timeout != 0);
- DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
-}
-
-/*
- * Notify the heartbeat check thread that an IRQ has been received.
- */
-static irqreturn_t
-xpc_act_IRQ_handler(int irq, void *dev_id)
-{
- atomic_inc(&xpc_act_IRQ_rcvd);
- wake_up_interruptible(&xpc_act_IRQ_wq);
- return IRQ_HANDLED;
+ DBUG_ON(part->disengage_timeout != 0);
+ DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
}
/*
@@ -210,15 +250,63 @@ xpc_act_IRQ_handler(int irq, void *dev_id)
static void
xpc_hb_beater(unsigned long dummy)
{
- xpc_vars->heartbeat++;
+ xpc_increment_heartbeat();
- if (time_after_eq(jiffies, xpc_hb_check_timeout))
- wake_up_interruptible(&xpc_act_IRQ_wq);
+ if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
add_timer(&xpc_hb_timer);
}
+static void
+xpc_start_hb_beater(void)
+{
+ xpc_heartbeat_init();
+ init_timer(&xpc_hb_timer);
+ xpc_hb_timer.function = xpc_hb_beater;
+ xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+ del_timer_sync(&xpc_hb_timer);
+ xpc_heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active. If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+ struct xpc_partition *part;
+ short partid;
+ enum xp_retval ret;
+
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+ if (xpc_exiting)
+ break;
+
+ if (partid == xp_partition_id)
+ continue;
+
+ part = &xpc_partitions[partid];
+
+ if (part->act_state == XPC_P_AS_INACTIVE ||
+ part->act_state == XPC_P_AS_DEACTIVATING) {
+ continue;
+ }
+
+ ret = xpc_get_remote_heartbeat(part);
+ if (ret != xpSuccess)
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ }
+}
+
/*
* This thread is responsible for nearly all of the partition
* activation/deactivation.
@@ -226,8 +314,6 @@ xpc_hb_beater(unsigned long dummy)
static int
xpc_hb_checker(void *ignore)
{
- int last_IRQ_count = 0;
- int new_IRQ_count;
int force_IRQ = 0;
/* this thread was marked active by xpc_hb_init() */
@@ -236,56 +322,49 @@ xpc_hb_checker(void *ignore)
/* set our heartbeating to other partitions into motion */
xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
- xpc_hb_beater(0);
+ xpc_start_hb_beater();
while (!xpc_exiting) {
dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
"been received\n",
(int)(xpc_hb_check_timeout - jiffies),
- atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
+ xpc_activate_IRQ_rcvd);
/* checking of remote heartbeats is skewed by IRQ handling */
- if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+ if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+ xpc_hb_check_timeout = jiffies +
+ (xpc_hb_check_interval * HZ);
+
dev_dbg(xpc_part, "checking remote heartbeats\n");
xpc_check_remote_hb();
/*
- * We need to periodically recheck to ensure no
- * IPI/AMO pairs have been missed. That check
- * must always reset xpc_hb_check_timeout.
+ * On sn2 we need to periodically recheck to ensure no
+ * IRQ/amo pairs have been missed.
*/
- force_IRQ = 1;
+ if (is_shub())
+ force_IRQ = 1;
}
/* check for outstanding IRQs */
- new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
- if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
+ if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
force_IRQ = 0;
-
- dev_dbg(xpc_part, "found an IRQ to process; will be "
- "resetting xpc_hb_check_timeout\n");
-
- last_IRQ_count += xpc_identify_act_IRQ_sender();
- if (last_IRQ_count < new_IRQ_count) {
- /* retry once to help avoid missing AMO */
- (void)xpc_identify_act_IRQ_sender();
- }
- last_IRQ_count = new_IRQ_count;
-
- xpc_hb_check_timeout = jiffies +
- (xpc_hb_check_interval * HZ);
+ dev_dbg(xpc_part, "processing activate IRQs "
+ "received\n");
+ xpc_process_activate_IRQ_rcvd();
}
/* wait for IRQ or timeout */
- (void)wait_event_interruptible(xpc_act_IRQ_wq,
- (last_IRQ_count <
- atomic_read(&xpc_act_IRQ_rcvd)
- || time_after_eq(jiffies,
- xpc_hb_check_timeout) ||
+ (void)wait_event_interruptible(xpc_activate_IRQ_wq,
+ (time_is_before_eq_jiffies(
+ xpc_hb_check_timeout) ||
+ xpc_activate_IRQ_rcvd > 0 ||
xpc_exiting));
}
+ xpc_stop_hb_beater();
+
dev_dbg(xpc_part, "heartbeat checker is exiting\n");
/* mark this thread as having exited */
@@ -311,37 +390,8 @@ xpc_initiate_discovery(void *ignore)
}
/*
- * Establish first contact with the remote partititon. This involves pulling
- * the XPC per partition variables from the remote partition and waiting for
- * the remote partition to pull ours.
- */
-static enum xp_retval
-xpc_make_first_contact(struct xpc_partition *part)
-{
- enum xp_retval ret;
-
- while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) {
- if (ret != xpRetry) {
- XPC_DEACTIVATE_PARTITION(part, ret);
- return ret;
- }
-
- dev_dbg(xpc_chan, "waiting to make first contact with "
- "partition %d\n", XPC_PARTID(part));
-
- /* wait a 1/4 of a second or so */
- (void)msleep_interruptible(250);
-
- if (part->act_state == XPC_P_DEACTIVATING)
- return part->reason;
- }
-
- return xpc_mark_partition_active(part);
-}
-
-/*
* The first kthread assigned to a newly activated partition is the one
- * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
* that kthread until the partition is brought down, at which time that kthread
* returns back to XPC HB. (The return of that kthread will signify to XPC HB
* that XPC has dismantled all communication infrastructure for the associated
@@ -354,11 +404,11 @@ xpc_make_first_contact(struct xpc_partition *part)
static void
xpc_channel_mgr(struct xpc_partition *part)
{
- while (part->act_state != XPC_P_DEACTIVATING ||
+ while (part->act_state != XPC_P_AS_DEACTIVATING ||
atomic_read(&part->nchannels_active) > 0 ||
!xpc_partition_disengaged(part)) {
- xpc_process_channel_activity(part);
+ xpc_process_sent_chctl_flags(part);
/*
* Wait until we've been requested to activate kthreads or
@@ -376,8 +426,8 @@ xpc_channel_mgr(struct xpc_partition *part)
atomic_dec(&part->channel_mgr_requests);
(void)wait_event_interruptible(part->channel_mgr_wq,
(atomic_read(&part->channel_mgr_requests) > 0 ||
- part->local_IPI_amo != 0 ||
- (part->act_state == XPC_P_DEACTIVATING &&
+ part->chctl.all_flags != 0 ||
+ (part->act_state == XPC_P_AS_DEACTIVATING &&
atomic_read(&part->nchannels_active) == 0 &&
xpc_partition_disengaged(part))));
atomic_set(&part->channel_mgr_requests, 1);
@@ -385,47 +435,163 @@ xpc_channel_mgr(struct xpc_partition *part)
}
/*
- * When XPC HB determines that a partition has come up, it will create a new
- * kthread and that kthread will call this function to attempt to set up the
- * basic infrastructure used for Cross Partition Communication with the newly
- * upped partition.
- *
- * The kthread that was created by XPC HB and which setup the XPC
- * infrastructure will remain assigned to the partition until the partition
- * goes down. At which time the kthread will teardown the XPC infrastructure
- * and then exit.
- *
- * XPC HB will put the remote partition's XPC per partition specific variables
- * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
- * calling xpc_partition_up().
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
*/
-static void
-xpc_partition_up(struct xpc_partition *part)
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL)
+ return NULL;
+
+ if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+ return *base;
+
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL)
+ return NULL;
+
+ return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
{
+ enum xp_retval ret;
+ int ch_number;
+ struct xpc_channel *ch;
+ short partid = XPC_PARTID(part);
+
+ /*
+ * Allocate all of the channel structures as a contiguous chunk of
+ * memory.
+ */
DBUG_ON(part->channels != NULL);
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+ GFP_KERNEL);
+ if (part->channels == NULL) {
+ dev_err(xpc_chan, "can't get memory for channels\n");
+ return xpNoMemory;
+ }
- dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
+ /* allocate the remote open and close args */
- if (xpc_setup_infrastructure(part) != xpSuccess)
- return;
+ part->remote_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+ GFP_KERNEL, &part->
+ remote_openclose_args_base);
+ if (part->remote_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ ret = xpNoMemory;
+ goto out_1;
+ }
+
+ part->chctl.all_flags = 0;
+ spin_lock_init(&part->chctl_lock);
+
+ atomic_set(&part->channel_mgr_requests, 1);
+ init_waitqueue_head(&part->channel_mgr_wq);
+
+ part->nchannels = XPC_MAX_NCHANNELS;
+
+ atomic_set(&part->nchannels_active, 0);
+ atomic_set(&part->nchannels_engaged, 0);
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch = &part->channels[ch_number];
+
+ ch->partid = partid;
+ ch->number = ch_number;
+ ch->flags = XPC_C_DISCONNECTED;
+
+ atomic_set(&ch->kthreads_assigned, 0);
+ atomic_set(&ch->kthreads_idle, 0);
+ atomic_set(&ch->kthreads_active, 0);
+
+ atomic_set(&ch->references, 0);
+ atomic_set(&ch->n_to_notify, 0);
+
+ spin_lock_init(&ch->lock);
+ init_completion(&ch->wdisconnect_wait);
+
+ atomic_set(&ch->n_on_msg_allocate_wq, 0);
+ init_waitqueue_head(&ch->msg_allocate_wq);
+ init_waitqueue_head(&ch->idle_wq);
+ }
+
+ ret = xpc_setup_ch_structures_sn(part);
+ if (ret != xpSuccess)
+ goto out_2;
+
+ /*
+ * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+ * we're declaring that this partition is ready to go.
+ */
+ part->setup_state = XPC_P_SS_SETUP;
+
+ return xpSuccess;
+
+ /* setup of ch structures failed */
+out_2:
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+out_1:
+ kfree(part->channels);
+ part->channels = NULL;
+ return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+ DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+ DBUG_ON(atomic_read(&part->nchannels_active) != 0);
/*
- * The kthread that XPC HB called us with will become the
- * channel manager for this partition. It will not return
- * back to XPC HB until the partition's XPC infrastructure
- * has been dismantled.
+ * Make this partition inaccessible to local processes by marking it
+ * as no longer setup. Then wait before proceeding with the teardown
+ * until all existing references cease.
*/
+ DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+ part->setup_state = XPC_P_SS_WTEARDOWN;
- (void)xpc_part_ref(part); /* this will always succeed */
+ wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
- if (xpc_make_first_contact(part) == xpSuccess)
- xpc_channel_mgr(part);
+ /* now we can begin tearing down the infrastructure */
- xpc_part_deref(part);
+ xpc_teardown_ch_structures_sn(part);
- xpc_teardown_infrastructure(part);
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
+
+ part->setup_state = XPC_P_SS_TORNDOWN;
}
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
static int
xpc_activating(void *__partid)
{
@@ -433,64 +599,47 @@ xpc_activating(void *__partid)
struct xpc_partition *part = &xpc_partitions[partid];
unsigned long irq_flags;
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
spin_lock_irqsave(&part->act_lock, irq_flags);
- if (part->act_state == XPC_P_DEACTIVATING) {
- part->act_state = XPC_P_INACTIVE;
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
+ part->act_state = XPC_P_AS_INACTIVE;
spin_unlock_irqrestore(&part->act_lock, irq_flags);
part->remote_rp_pa = 0;
return 0;
}
/* indicate the thread is activating */
- DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
- part->act_state = XPC_P_ACTIVATING;
+ DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+ part->act_state = XPC_P_AS_ACTIVATING;
XPC_SET_REASON(part, 0, 0);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
- dev_dbg(xpc_part, "bringing partition %d up\n", partid);
+ dev_dbg(xpc_part, "activating partition %d\n", partid);
- /*
- * Register the remote partition's AMOs with SAL so it can handle
- * and cleanup errors within that address range should the remote
- * partition go down. We don't unregister this range because it is
- * difficult to tell when outstanding writes to the remote partition
- * are finished and thus when it is safe to unregister. This should
- * not result in wasted space in the SAL xp_addr_region table because
- * we should get the same page for remote_amos_page_pa after module
- * reloads and system reboots.
- */
- if (sn_register_xp_addr_region(part->remote_amos_page_pa,
- PAGE_SIZE, 1) < 0) {
- dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
- "xp_addr region\n", partid);
+ xpc_allow_hb(partid);
- spin_lock_irqsave(&part->act_lock, irq_flags);
- part->act_state = XPC_P_INACTIVE;
- XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__);
- spin_unlock_irqrestore(&part->act_lock, irq_flags);
- part->remote_rp_pa = 0;
- return 0;
- }
+ if (xpc_setup_ch_structures(part) == xpSuccess) {
+ (void)xpc_part_ref(part); /* this will always succeed */
- xpc_allow_hb(partid, xpc_vars);
- xpc_IPI_send_activated(part);
+ if (xpc_make_first_contact(part) == xpSuccess) {
+ xpc_mark_partition_active(part);
+ xpc_channel_mgr(part);
+ /* won't return until partition is deactivating */
+ }
- /*
- * xpc_partition_up() holds this thread and marks this partition as
- * XPC_P_ACTIVE by calling xpc_hb_mark_active().
- */
- (void)xpc_partition_up(part);
+ xpc_part_deref(part);
+ xpc_teardown_ch_structures(part);
+ }
- xpc_disallow_hb(partid, xpc_vars);
+ xpc_disallow_hb(partid);
xpc_mark_partition_inactive(part);
if (part->reason == xpReactivating) {
/* interrupting ourselves results in activating partition */
- xpc_IPI_send_reactivate(part);
+ xpc_request_partition_reactivation(part);
}
return 0;
@@ -505,9 +654,9 @@ xpc_activate_partition(struct xpc_partition *part)
spin_lock_irqsave(&part->act_lock, irq_flags);
- DBUG_ON(part->act_state != XPC_P_INACTIVE);
+ DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
- part->act_state = XPC_P_ACTIVATION_REQ;
+ part->act_state = XPC_P_AS_ACTIVATION_REQ;
XPC_SET_REASON(part, xpCloneKThread, __LINE__);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -516,62 +665,12 @@ xpc_activate_partition(struct xpc_partition *part)
partid);
if (IS_ERR(kthread)) {
spin_lock_irqsave(&part->act_lock, irq_flags);
- part->act_state = XPC_P_INACTIVE;
+ part->act_state = XPC_P_AS_INACTIVE;
XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
}
}
-/*
- * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
- * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not. If it has, then wake up the
- * associated kthread to handle it.
- *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
- * running on other partitions.
- *
- * Noteworthy Arguments:
- *
- * irq - Interrupt ReQuest number. NOT USED.
- *
- * dev_id - partid of IPI's potential sender.
- */
-irqreturn_t
-xpc_notify_IRQ_handler(int irq, void *dev_id)
-{
- short partid = (short)(u64)dev_id;
- struct xpc_partition *part = &xpc_partitions[partid];
-
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
- if (xpc_part_ref(part)) {
- xpc_check_for_channel_activity(part);
-
- xpc_part_deref(part);
- }
- return IRQ_HANDLED;
-}
-
-/*
- * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
- * was received.
- */
-void
-xpc_dropped_IPI_check(struct xpc_partition *part)
-{
- if (xpc_part_ref(part)) {
- xpc_check_for_channel_activity(part);
-
- part->dropped_IPI_timer.expires = jiffies +
- XPC_P_DROPPED_IPI_WAIT;
- add_timer(&part->dropped_IPI_timer);
- xpc_part_deref(part);
- }
-}
-
void
xpc_activate_kthreads(struct xpc_channel *ch, int needed)
{
@@ -616,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
do {
/* deliver messages to their intended recipients */
- while (ch->w_local_GP.get < ch->w_remote_GP.put &&
+ while (xpc_n_of_deliverable_payloads(ch) > 0 &&
!(ch->flags & XPC_C_DISCONNECTING)) {
- xpc_deliver_msg(ch);
+ xpc_deliver_payload(ch);
}
if (atomic_inc_return(&ch->kthreads_idle) >
@@ -632,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
"wait_event_interruptible_exclusive()\n");
(void)wait_event_interruptible_exclusive(ch->idle_wq,
- (ch->w_local_GP.get < ch->w_remote_GP.put ||
+ (xpc_n_of_deliverable_payloads(ch) > 0 ||
(ch->flags & XPC_C_DISCONNECTING)));
atomic_dec(&ch->kthreads_idle);
@@ -677,7 +776,7 @@ xpc_kthread_start(void *args)
* additional kthreads to help deliver them. We only
* need one less than total #of messages to deliver.
*/
- n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
+ n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
xpc_activate_kthreads(ch, n_needed);
@@ -703,11 +802,9 @@ xpc_kthread_start(void *args)
}
spin_unlock_irqrestore(&ch->lock, irq_flags);
- if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
- if (atomic_dec_return(&part->nchannels_engaged) == 0) {
- xpc_mark_partition_disengaged(part);
- xpc_IPI_send_disengage(part);
- }
+ if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+ atomic_dec_return(&part->nchannels_engaged) == 0) {
+ xpc_indicate_partition_disengaged(part);
}
xpc_msgqueue_deref(ch);
@@ -758,9 +855,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
} else if (ch->flags & XPC_C_DISCONNECTING) {
break;
- } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
- if (atomic_inc_return(&part->nchannels_engaged) == 1)
- xpc_mark_partition_engaged(part);
+ } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+ atomic_inc_return(&part->nchannels_engaged) == 1) {
+ xpc_indicate_partition_engaged(part);
}
(void)xpc_part_ref(part);
xpc_msgqueue_ref(ch);
@@ -782,8 +879,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
atomic_dec_return(&part->nchannels_engaged) == 0) {
- xpc_mark_partition_disengaged(part);
- xpc_IPI_send_disengage(part);
+ xpc_indicate_partition_disengaged(part);
}
xpc_msgqueue_deref(ch);
xpc_part_deref(part);
@@ -815,7 +911,7 @@ xpc_disconnect_wait(int ch_number)
int wakeup_channel_mgr;
/* now wait for all callouts to the caller's function to cease */
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
part = &xpc_partitions[partid];
if (!xpc_part_ref(part))
@@ -834,16 +930,15 @@ xpc_disconnect_wait(int ch_number)
DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
wakeup_channel_mgr = 0;
- if (ch->delayed_IPI_flags) {
- if (part->act_state != XPC_P_DEACTIVATING) {
- spin_lock(&part->IPI_lock);
- XPC_SET_IPI_FLAGS(part->local_IPI_amo,
- ch->number,
- ch->delayed_IPI_flags);
- spin_unlock(&part->IPI_lock);
+ if (ch->delayed_chctl_flags) {
+ if (part->act_state != XPC_P_AS_DEACTIVATING) {
+ spin_lock(&part->chctl_lock);
+ part->chctl.flags[ch->number] |=
+ ch->delayed_chctl_flags;
+ spin_unlock(&part->chctl_lock);
wakeup_channel_mgr = 1;
}
- ch->delayed_IPI_flags = 0;
+ ch->delayed_chctl_flags = 0;
}
ch->flags &= ~XPC_C_WDISCONNECT;
@@ -856,13 +951,63 @@ xpc_disconnect_wait(int ch_number)
}
}
+static int
+xpc_setup_partitions(void)
+{
+ short partid;
+ struct xpc_partition *part;
+
+ xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+ xp_max_npartitions, GFP_KERNEL);
+ if (xpc_partitions == NULL) {
+ dev_err(xpc_part, "can't get memory for partition structure\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * The first few fields of each entry of xpc_partitions[] need to
+ * be initialized now so that calls to xpc_connect() and
+ * xpc_disconnect() can be made prior to the activation of any remote
+ * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+ * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+ * PARTITION HAS BEEN ACTIVATED.
+ */
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
+ part = &xpc_partitions[partid];
+
+ DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+ part->activate_IRQ_rcvd = 0;
+ spin_lock_init(&part->act_lock);
+ part->act_state = XPC_P_AS_INACTIVE;
+ XPC_SET_REASON(part, 0, 0);
+
+ init_timer(&part->disengage_timer);
+ part->disengage_timer.function =
+ xpc_timeout_partition_disengage;
+ part->disengage_timer.data = (unsigned long)part;
+
+ part->setup_state = XPC_P_SS_UNSET;
+ init_waitqueue_head(&part->teardown_wq);
+ atomic_set(&part->references, 0);
+ }
+
+ return xpc_setup_partitions_sn();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+ kfree(xpc_partitions);
+}
+
static void
xpc_do_exit(enum xp_retval reason)
{
short partid;
int active_part_count, printed_waiting_msg = 0;
struct xpc_partition *part;
- unsigned long printmsg_time, disengage_request_timeout = 0;
+ unsigned long printmsg_time, disengage_timeout = 0;
/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
DBUG_ON(xpc_exiting == 1);
@@ -873,10 +1018,7 @@ xpc_do_exit(enum xp_retval reason)
* the heartbeat checker thread in case it's sleeping.
*/
xpc_exiting = 1;
- wake_up_interruptible(&xpc_act_IRQ_wq);
-
- /* ignore all incoming interrupts */
- free_irq(SGI_XPC_ACTIVATE, NULL);
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
/* wait for the discovery thread to exit */
wait_for_completion(&xpc_discovery_exited);
@@ -889,17 +1031,17 @@ xpc_do_exit(enum xp_retval reason)
/* wait for all partitions to become inactive */
- printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
- xpc_disengage_request_timedout = 0;
+ printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+ xpc_disengage_timedout = 0;
do {
active_part_count = 0;
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
part = &xpc_partitions[partid];
if (xpc_partition_disengaged(part) &&
- part->act_state == XPC_P_INACTIVE) {
+ part->act_state == XPC_P_AS_INACTIVE) {
continue;
}
@@ -907,36 +1049,32 @@ xpc_do_exit(enum xp_retval reason)
XPC_DEACTIVATE_PARTITION(part, reason);
- if (part->disengage_request_timeout >
- disengage_request_timeout) {
- disengage_request_timeout =
- part->disengage_request_timeout;
- }
+ if (part->disengage_timeout > disengage_timeout)
+ disengage_timeout = part->disengage_timeout;
}
- if (xpc_partition_engaged(-1UL)) {
- if (time_after(jiffies, printmsg_time)) {
+ if (xpc_any_partition_engaged()) {
+ if (time_is_before_jiffies(printmsg_time)) {
dev_info(xpc_part, "waiting for remote "
- "partitions to disengage, timeout in "
- "%ld seconds\n",
- (disengage_request_timeout - jiffies)
- / HZ);
+ "partitions to deactivate, timeout in "
+ "%ld seconds\n", (disengage_timeout -
+ jiffies) / HZ);
printmsg_time = jiffies +
- (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
printed_waiting_msg = 1;
}
} else if (active_part_count > 0) {
if (printed_waiting_msg) {
dev_info(xpc_part, "waiting for local partition"
- " to disengage\n");
+ " to deactivate\n");
printed_waiting_msg = 0;
}
} else {
- if (!xpc_disengage_request_timedout) {
+ if (!xpc_disengage_timedout) {
dev_info(xpc_part, "all partitions have "
- "disengaged\n");
+ "deactivated\n");
}
break;
}
@@ -946,33 +1084,28 @@ xpc_do_exit(enum xp_retval reason)
} while (1);
- DBUG_ON(xpc_partition_engaged(-1UL));
+ DBUG_ON(xpc_any_partition_engaged());
+ DBUG_ON(xpc_any_hbs_allowed() != 0);
- /* indicate to others that our reserved page is uninitialized */
- xpc_rsvd_page->vars_pa = 0;
-
- /* now it's time to eliminate our heartbeat */
- del_timer_sync(&xpc_hb_timer);
- DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
+ xpc_teardown_rsvd_page();
if (reason == xpUnloading) {
- /* take ourselves off of the reboot_notifier_list */
- (void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
- /* take ourselves off of the die_notifier list */
(void)unregister_die_notifier(&xpc_die_notifier);
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
}
- /* close down protections for IPI operations */
- xpc_restrict_IPI_ops();
-
/* clear the interface to XPC's functions */
xpc_clear_interface();
if (xpc_sysctl)
unregister_sysctl_table(xpc_sysctl);
- kfree(xpc_remote_copy_buffer_base);
+ xpc_teardown_partitions();
+
+ if (is_shub())
+ xpc_exit_sn2();
+ else
+ xpc_exit_uv();
}
/*
@@ -1002,60 +1135,57 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
}
/*
- * Notify other partitions to disengage from all references to our memory.
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
*/
static void
-xpc_die_disengage(void)
+xpc_die_deactivate(void)
{
struct xpc_partition *part;
short partid;
- unsigned long engaged;
- long time, printmsg_time, disengage_request_timeout;
+ int any_engaged;
+ long keep_waiting;
+ long wait_to_print;
/* keep xpc_hb_checker thread from doing anything (just in case) */
xpc_exiting = 1;
- xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */
+ xpc_disallow_all_hbs(); /*indicate we're deactivated */
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ for (partid = 0; partid < xp_max_npartitions; partid++) {
part = &xpc_partitions[partid];
- if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
- remote_vars_version)) {
-
- /* just in case it was left set by an earlier XPC */
- xpc_clear_partition_engaged(1UL << partid);
- continue;
- }
-
- if (xpc_partition_engaged(1UL << partid) ||
- part->act_state != XPC_P_INACTIVE) {
- xpc_request_partition_disengage(part);
- xpc_mark_partition_disengaged(part);
- xpc_IPI_send_disengage(part);
+ if (xpc_partition_engaged(partid) ||
+ part->act_state != XPC_P_AS_INACTIVE) {
+ xpc_request_partition_deactivation(part);
+ xpc_indicate_partition_disengaged(part);
}
}
- time = rtc_time();
- printmsg_time = time +
- (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
- disengage_request_timeout = time +
- (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
-
- /* wait for all other partitions to disengage from us */
+ /*
+ * Though we requested that all other partitions deactivate from us,
+ * we only wait until they've all disengaged or we've reached the
+ * defined timelimit.
+ *
+ * Given that one iteration through the following while-loop takes
+ * approximately 200 microseconds, calculate the #of loops to take
+ * before bailing and the #of loops before printing a waiting message.
+ */
+ keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+ wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
while (1) {
- engaged = xpc_partition_engaged(-1UL);
- if (!engaged) {
- dev_info(xpc_part, "all partitions have disengaged\n");
+ any_engaged = xpc_any_partition_engaged();
+ if (!any_engaged) {
+ dev_info(xpc_part, "all partitions have deactivated\n");
break;
}
- time = rtc_time();
- if (time >= disengage_request_timeout) {
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
- if (engaged & (1UL << partid)) {
- dev_info(xpc_part, "disengage from "
+ if (!keep_waiting--) {
+ for (partid = 0; partid < xp_max_npartitions;
+ partid++) {
+ if (xpc_partition_engaged(partid)) {
+ dev_info(xpc_part, "deactivate from "
"remote partition %d timed "
"out\n", partid);
}
@@ -1063,15 +1193,15 @@ xpc_die_disengage(void)
break;
}
- if (time >= printmsg_time) {
+ if (!wait_to_print--) {
dev_info(xpc_part, "waiting for remote partitions to "
- "disengage, timeout in %ld seconds\n",
- (disengage_request_timeout - time) /
- sn_rtc_cycles_per_second);
- printmsg_time = time +
- (XPC_DISENGAGE_PRINTMSG_INTERVAL *
- sn_rtc_cycles_per_second);
+ "deactivate, timeout in %ld seconds\n",
+ keep_waiting / (1000 * 5));
+ wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+ 1000 * 5;
}
+
+ udelay(200);
}
}
@@ -1086,10 +1216,11 @@ xpc_die_disengage(void)
static int
xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
{
+#ifdef CONFIG_IA64 /* !!! temporary kludge */
switch (event) {
case DIE_MACHINE_RESTART:
case DIE_MACHINE_HALT:
- xpc_die_disengage();
+ xpc_die_deactivate();
break;
case DIE_KDEBUG_ENTER:
@@ -1100,8 +1231,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
/* fall through */
case DIE_MCA_MONARCH_ENTER:
case DIE_INIT_MONARCH_ENTER:
- xpc_vars->heartbeat++;
- xpc_vars->heartbeat_offline = 1;
+ xpc_offline_heartbeat();
break;
case DIE_KDEBUG_LEAVE:
@@ -1112,10 +1242,12 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
/* fall through */
case DIE_MCA_MONARCH_LEAVE:
case DIE_INIT_MONARCH_LEAVE:
- xpc_vars->heartbeat++;
- xpc_vars->heartbeat_offline = 0;
+ xpc_online_heartbeat();
break;
}
+#else
+ xpc_die_deactivate();
+#endif
return NOTIFY_DONE;
}
@@ -1124,105 +1256,52 @@ int __init
xpc_init(void)
{
int ret;
- short partid;
- struct xpc_partition *part;
struct task_struct *kthread;
- size_t buf_size;
-
- if (!ia64_platform_is("sn2"))
- return -ENODEV;
-
- buf_size = max(XPC_RP_VARS_SIZE,
- XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
- xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
- GFP_KERNEL,
- &xpc_remote_copy_buffer_base);
- if (xpc_remote_copy_buffer == NULL)
- return -ENOMEM;
snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
- xpc_sysctl = register_sysctl_table(xpc_sys_dir);
-
- /*
- * The first few fields of each entry of xpc_partitions[] need to
- * be initialized now so that calls to xpc_connect() and
- * xpc_disconnect() can be made prior to the activation of any remote
- * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
- * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
- * PARTITION HAS BEEN ACTIVATED.
- */
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
- part = &xpc_partitions[partid];
-
- DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
-
- part->act_IRQ_rcvd = 0;
- spin_lock_init(&part->act_lock);
- part->act_state = XPC_P_INACTIVE;
- XPC_SET_REASON(part, 0, 0);
+ if (is_shub()) {
+ /*
+ * The ia64-sn2 architecture supports at most 64 partitions.
+ * And the inability to unregister remote amos restricts us
+ * further to only support exactly 64 partitions on this
+ * architecture, no less.
+ */
+ if (xp_max_npartitions != 64) {
+ dev_err(xpc_part, "max #of partitions not set to 64\n");
+ ret = -EINVAL;
+ } else {
+ ret = xpc_init_sn2();
+ }
- init_timer(&part->disengage_request_timer);
- part->disengage_request_timer.function =
- xpc_timeout_partition_disengage_request;
- part->disengage_request_timer.data = (unsigned long)part;
+ } else if (is_uv()) {
+ ret = xpc_init_uv();
- part->setup_state = XPC_P_UNSET;
- init_waitqueue_head(&part->teardown_wq);
- atomic_set(&part->references, 0);
+ } else {
+ ret = -ENODEV;
}
- /*
- * Open up protections for IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
- xpc_allow_IPI_ops();
-
- /*
- * Interrupts being processed will increment this atomic variable and
- * awaken the heartbeat thread which will process the interrupts.
- */
- atomic_set(&xpc_act_IRQ_rcvd, 0);
+ if (ret != 0)
+ return ret;
- /*
- * This is safe to do before the xpc_hb_checker thread has started
- * because the handler releases a wait queue. If an interrupt is
- * received before the thread is waiting, it will not go to sleep,
- * but rather immediately process the interrupt.
- */
- ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
- "xpc hb", NULL);
+ ret = xpc_setup_partitions();
if (ret != 0) {
- dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
- "errno=%d\n", -ret);
-
- xpc_restrict_IPI_ops();
-
- if (xpc_sysctl)
- unregister_sysctl_table(xpc_sysctl);
-
- kfree(xpc_remote_copy_buffer_base);
- return -EBUSY;
+ dev_err(xpc_part, "can't get memory for partition structure\n");
+ goto out_1;
}
+ xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
/*
* Fill the partition reserved page with the information needed by
* other partitions to discover we are alive and establish initial
* communications.
*/
- xpc_rsvd_page = xpc_rsvd_page_init();
- if (xpc_rsvd_page == NULL) {
- dev_err(xpc_part, "could not setup our reserved page\n");
-
- free_irq(SGI_XPC_ACTIVATE, NULL);
- xpc_restrict_IPI_ops();
-
- if (xpc_sysctl)
- unregister_sysctl_table(xpc_sysctl);
-
- kfree(xpc_remote_copy_buffer_base);
- return -EBUSY;
+ ret = xpc_setup_rsvd_page();
+ if (ret != 0) {
+ dev_err(xpc_part, "can't setup our reserved page\n");
+ goto out_2;
}
/* add ourselves to the reboot_notifier_list */
@@ -1235,9 +1314,6 @@ xpc_init(void)
if (ret != 0)
dev_warn(xpc_part, "can't register die notifier\n");
- init_timer(&xpc_hb_timer);
- xpc_hb_timer.function = xpc_hb_beater;
-
/*
* The real work-horse behind xpc. This processes incoming
* interrupts and monitors remote heartbeats.
@@ -1245,25 +1321,8 @@ xpc_init(void)
kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
if (IS_ERR(kthread)) {
dev_err(xpc_part, "failed while forking hb check thread\n");
-
- /* indicate to others that our reserved page is uninitialized */
- xpc_rsvd_page->vars_pa = 0;
-
- /* take ourselves off of the reboot_notifier_list */
- (void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
- /* take ourselves off of the die_notifier list */
- (void)unregister_die_notifier(&xpc_die_notifier);
-
- del_timer_sync(&xpc_hb_timer);
- free_irq(SGI_XPC_ACTIVATE, NULL);
- xpc_restrict_IPI_ops();
-
- if (xpc_sysctl)
- unregister_sysctl_table(xpc_sysctl);
-
- kfree(xpc_remote_copy_buffer_base);
- return -EBUSY;
+ ret = -EBUSY;
+ goto out_3;
}
/*
@@ -1285,11 +1344,28 @@ xpc_init(void)
/* set the interface to point at XPC's functions */
xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
- xpc_initiate_allocate, xpc_initiate_send,
- xpc_initiate_send_notify, xpc_initiate_received,
- xpc_initiate_partid_to_nasids);
+ xpc_initiate_send, xpc_initiate_send_notify,
+ xpc_initiate_received, xpc_initiate_partid_to_nasids);
return 0;
+
+ /* initialization was not successful */
+out_3:
+ xpc_teardown_rsvd_page();
+
+ (void)unregister_die_notifier(&xpc_die_notifier);
+ (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+ if (xpc_sysctl)
+ unregister_sysctl_table(xpc_sysctl);
+
+ xpc_teardown_partitions();
+out_1:
+ if (is_shub())
+ xpc_exit_sn2();
+ else
+ xpc_exit_uv();
+ return ret;
}
module_init(xpc_init);
@@ -1314,9 +1390,9 @@ module_param(xpc_hb_check_interval, int, 0);
MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
"heartbeat checks.");
-module_param(xpc_disengage_request_timelimit, int, 0);
-MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
- "for disengage request to complete.");
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+ "for disengage to complete.");
module_param(xpc_kdebug_ignore, int, 0);
MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 7dd4b5812c4..6722f6fe4dc 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -15,57 +15,22 @@
*
*/
-#include <linux/kernel.h>
-#include <linux/sysctl.h>
-#include <linux/cache.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/uncached.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/sn/nodepda.h>
-#include <asm/sn/addrs.h>
+#include <linux/device.h>
+#include <linux/hardirq.h>
#include "xpc.h"
/* XPC is exiting flag */
int xpc_exiting;
-/* SH_IPI_ACCESS shub register value on startup */
-static u64 xpc_sh1_IPI_access;
-static u64 xpc_sh2_IPI_access0;
-static u64 xpc_sh2_IPI_access1;
-static u64 xpc_sh2_IPI_access2;
-static u64 xpc_sh2_IPI_access3;
-
-/* original protection values for each node */
-u64 xpc_prot_vec[MAX_NUMNODES];
-
/* this partition's reserved page pointers */
struct xpc_rsvd_page *xpc_rsvd_page;
-static u64 *xpc_part_nasids;
-static u64 *xpc_mach_nasids;
-struct xpc_vars *xpc_vars;
-struct xpc_vars_part *xpc_vars_part;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
-static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
-static int xp_nasid_mask_words; /* actual size in words of nasid mask */
-
-/*
- * For performance reasons, each entry of xpc_partitions[] is cacheline
- * aligned. And xpc_partitions[] is padded with an additional entry at the
- * end so that the last legitimate entry doesn't share its cacheline with
- * another variable.
- */
-struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
+static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */
-/*
- * Generic buffer used to store a local copy of portions of a remote
- * partition's reserved page (either its header and part_nasids mask,
- * or its vars).
- */
-char *xpc_remote_copy_buffer;
-void *xpc_remote_copy_buffer_base;
+struct xpc_partition *xpc_partitions;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
@@ -95,56 +60,59 @@ xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
-static u64
+static unsigned long
xpc_get_rsvd_page_pa(int nasid)
{
- bte_result_t bte_res;
- s64 status;
+ enum xp_retval ret;
u64 cookie = 0;
- u64 rp_pa = nasid; /* seed with nasid */
- u64 len = 0;
- u64 buf = buf;
- u64 buf_len = 0;
+ unsigned long rp_pa = nasid; /* seed with nasid */
+ size_t len = 0;
+ size_t buf_len = 0;
+ void *buf = buf;
void *buf_base = NULL;
while (1) {
- status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
- &len);
+ /* !!! rp_pa will need to be _gpa on UV.
+ * ??? So do we save it into the architecture specific parts
+ * ??? of the xpc_partition structure? Do we rename this
+ * ??? function or have two versions? Rename rp_pa for UV to
+ * ??? rp_gpa?
+ */
+ ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
+ &len);
- dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
- "0x%016lx, address=0x%016lx, len=0x%016lx\n",
- status, cookie, rp_pa, len);
+ dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+ "address=0x%016lx, len=0x%016lx\n", ret,
+ (unsigned long)cookie, rp_pa, len);
- if (status != SALRET_MORE_PASSES)
+ if (ret != xpNeedMoreInfo)
break;
+ /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
if (L1_CACHE_ALIGN(len) > buf_len) {
kfree(buf_base);
buf_len = L1_CACHE_ALIGN(len);
- buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
- GFP_KERNEL,
- &buf_base);
+ buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+ &buf_base);
if (buf_base == NULL) {
dev_err(xpc_part, "unable to kmalloc "
"len=0x%016lx\n", buf_len);
- status = SALRET_ERROR;
+ ret = xpNoMemory;
break;
}
}
- bte_res = xp_bte_copy(rp_pa, buf, buf_len,
- (BTE_NOTIFY | BTE_WACQUIRE), NULL);
- if (bte_res != BTE_SUCCESS) {
- dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
- status = SALRET_ERROR;
+ ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
break;
}
}
kfree(buf_base);
- if (status != SALRET_OK)
+ if (ret != xpSuccess)
rp_pa = 0;
dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
@@ -156,300 +124,77 @@ xpc_get_rsvd_page_pa(int nasid)
* other partitions to discover we are alive and establish initial
* communications.
*/
-struct xpc_rsvd_page *
-xpc_rsvd_page_init(void)
+int
+xpc_setup_rsvd_page(void)
{
+ int ret;
struct xpc_rsvd_page *rp;
- AMO_t *amos_page;
- u64 rp_pa, nasid_array = 0;
- int i, ret;
+ unsigned long rp_pa;
+ unsigned long new_ts_jiffies;
/* get the local reserved page's address */
preempt_disable();
- rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+ rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
preempt_enable();
if (rp_pa == 0) {
dev_err(xpc_part, "SAL failed to locate the reserved page\n");
- return NULL;
+ return -ESRCH;
}
rp = (struct xpc_rsvd_page *)__va(rp_pa);
- if (rp->partid != sn_partition_id) {
- dev_err(xpc_part, "the reserved page's partid of %d should be "
- "%d\n", rp->partid, sn_partition_id);
- return NULL;
+ if (rp->SAL_version < 3) {
+ /* SAL_versions < 3 had a SAL_partid defined as a u8 */
+ rp->SAL_partid &= 0xff;
+ }
+ BUG_ON(rp->SAL_partid != xp_partition_id);
+
+ if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+ dev_err(xpc_part, "the reserved page's partid of %d is outside "
+ "supported range (< 0 || >= %d)\n", rp->SAL_partid,
+ xp_max_npartitions);
+ return -EINVAL;
}
rp->version = XPC_RP_VERSION;
+ rp->max_npartitions = xp_max_npartitions;
/* establish the actual sizes of the nasid masks */
if (rp->SAL_version == 1) {
/* SAL_version 1 didn't set the nasids_size field */
- rp->nasids_size = 128;
+ rp->SAL_nasids_size = 128;
}
- xp_nasid_mask_bytes = rp->nasids_size;
- xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+ xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+ xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+ BITS_PER_BYTE);
/* setup the pointers to the various items in the reserved page */
xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
- xpc_vars = XPC_RP_VARS(rp);
- xpc_vars_part = XPC_RP_VARS_PART(rp);
-
- /*
- * Before clearing xpc_vars, see if a page of AMOs had been previously
- * allocated. If not we'll need to allocate one and set permissions
- * so that cross-partition AMOs are allowed.
- *
- * The allocated AMO page needs MCA reporting to remain disabled after
- * XPC has unloaded. To make this work, we keep a copy of the pointer
- * to this page (i.e., amos_page) in the struct xpc_vars structure,
- * which is pointed to by the reserved page, and re-use that saved copy
- * on subsequent loads of XPC. This AMO page is never freed, and its
- * memory protections are never restricted.
- */
- amos_page = xpc_vars->amos_page;
- if (amos_page == NULL) {
- amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
- if (amos_page == NULL) {
- dev_err(xpc_part, "can't allocate page of AMOs\n");
- return NULL;
- }
-
- /*
- * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
- * when xpc_allow_IPI_ops() is called via xpc_hb_init().
- */
- if (!enable_shub_wars_1_1()) {
- ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
- PAGE_SIZE,
- SN_MEMPROT_ACCESS_CLASS_1,
- &nasid_array);
- if (ret != 0) {
- dev_err(xpc_part, "can't change memory "
- "protections\n");
- uncached_free_page(__IA64_UNCACHED_OFFSET |
- TO_PHYS((u64)amos_page), 1);
- return NULL;
- }
- }
- } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
- /*
- * EFI's XPBOOT can also set amos_page in the reserved page,
- * but it happens to leave it as an uncached physical address
- * and we need it to be an uncached virtual, so we'll have to
- * convert it.
- */
- if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
- dev_err(xpc_part, "previously used amos_page address "
- "is bad = 0x%p\n", (void *)amos_page);
- return NULL;
- }
- amos_page = (AMO_t *)TO_AMO((u64)amos_page);
- }
-
- /* clear xpc_vars */
- memset(xpc_vars, 0, sizeof(struct xpc_vars));
-
- xpc_vars->version = XPC_V_VERSION;
- xpc_vars->act_nasid = cpuid_to_nasid(0);
- xpc_vars->act_phys_cpuid = cpu_physical_id(0);
- xpc_vars->vars_part_pa = __pa(xpc_vars_part);
- xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
- xpc_vars->amos_page = amos_page; /* save for next load of XPC */
-
- /* clear xpc_vars_part */
- memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
- XP_MAX_PARTITIONS);
-
- /* initialize the activate IRQ related AMO variables */
- for (i = 0; i < xp_nasid_mask_words; i++)
- (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
-
- /* initialize the engaged remote partitions related AMO variables */
- (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
- (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
- /* timestamp of when reserved page was setup by XPC */
- rp->stamp = CURRENT_TIME;
+ ret = xpc_setup_rsvd_page_sn(rp);
+ if (ret != 0)
+ return ret;
/*
+ * Set timestamp of when reserved page was setup by XPC.
* This signifies to the remote partition that our reserved
* page is initialized.
*/
- rp->vars_pa = __pa(xpc_vars);
+ new_ts_jiffies = jiffies;
+ if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+ new_ts_jiffies++;
+ rp->ts_jiffies = new_ts_jiffies;
- return rp;
+ xpc_rsvd_page = rp;
+ return 0;
}
-/*
- * Change protections to allow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
void
-xpc_allow_IPI_ops(void)
+xpc_teardown_rsvd_page(void)
{
- int node;
- int nasid;
-
- /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
-
- if (is_shub2()) {
- xpc_sh2_IPI_access0 =
- (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
- xpc_sh2_IPI_access1 =
- (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
- xpc_sh2_IPI_access2 =
- (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
- xpc_sh2_IPI_access3 =
- (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
-
- for_each_online_node(node) {
- nasid = cnodeid_to_nasid(node);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
- -1UL);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
- -1UL);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
- -1UL);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
- -1UL);
- }
-
- } else {
- xpc_sh1_IPI_access =
- (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
-
- for_each_online_node(node) {
- nasid = cnodeid_to_nasid(node);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
- -1UL);
-
- /*
- * Since the BIST collides with memory operations on
- * SHUB 1.1 sn_change_memprotect() cannot be used.
- */
- if (enable_shub_wars_1_1()) {
- /* open up everything */
- xpc_prot_vec[node] = (u64)HUB_L((u64 *)
- GLOBAL_MMR_ADDR
- (nasid,
- SH1_MD_DQLP_MMR_DIR_PRIVEC0));
- HUB_S((u64 *)
- GLOBAL_MMR_ADDR(nasid,
- SH1_MD_DQLP_MMR_DIR_PRIVEC0),
- -1UL);
- HUB_S((u64 *)
- GLOBAL_MMR_ADDR(nasid,
- SH1_MD_DQRP_MMR_DIR_PRIVEC0),
- -1UL);
- }
- }
- }
-}
-
-/*
- * Restrict protections to disallow IPI operations (and AMO operations on
- * Shub 1.1 systems).
- */
-void
-xpc_restrict_IPI_ops(void)
-{
- int node;
- int nasid;
-
- /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
-
- if (is_shub2()) {
-
- for_each_online_node(node) {
- nasid = cnodeid_to_nasid(node);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
- xpc_sh2_IPI_access0);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
- xpc_sh2_IPI_access1);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
- xpc_sh2_IPI_access2);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
- xpc_sh2_IPI_access3);
- }
-
- } else {
-
- for_each_online_node(node) {
- nasid = cnodeid_to_nasid(node);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
- xpc_sh1_IPI_access);
-
- if (enable_shub_wars_1_1()) {
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
- SH1_MD_DQLP_MMR_DIR_PRIVEC0),
- xpc_prot_vec[node]);
- HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
- SH1_MD_DQRP_MMR_DIR_PRIVEC0),
- xpc_prot_vec[node]);
- }
- }
- }
-}
-
-/*
- * At periodic intervals, scan through all active partitions and ensure
- * their heartbeat is still active. If not, the partition is deactivated.
- */
-void
-xpc_check_remote_hb(void)
-{
- struct xpc_vars *remote_vars;
- struct xpc_partition *part;
- short partid;
- bte_result_t bres;
-
- remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
-
- for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-
- if (xpc_exiting)
- break;
-
- if (partid == sn_partition_id)
- continue;
-
- part = &xpc_partitions[partid];
-
- if (part->act_state == XPC_P_INACTIVE ||
- part->act_state == XPC_P_DEACTIVATING) {
- continue;
- }
-
- /* pull the remote_hb cache line */
- bres = xp_bte_copy(part->remote_vars_pa,
- (u64)remote_vars,
- XPC_RP_VARS_SIZE,
- (BTE_NOTIFY | BTE_WACQUIRE), NULL);
- if (bres != BTE_SUCCESS) {
- XPC_DEACTIVATE_PARTITION(part,
- xpc_map_bte_errors(bres));
- continue;
- }
-
- dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
- " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
- partid, remote_vars->heartbeat, part->last_heartbeat,
- remote_vars->heartbeat_offline,
- remote_vars->heartbeating_to_mask);
-
- if (((remote_vars->heartbeat == part->last_heartbeat) &&
- (remote_vars->heartbeat_offline == 0)) ||
- !xpc_hb_allowed(sn_partition_id, remote_vars)) {
-
- XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
- continue;
- }
-
- part->last_heartbeat = remote_vars->heartbeat;
- }
+ /* a zero timestamp indicates our rsvd page is not initialized */
+ xpc_rsvd_page->ts_jiffies = 0;
}
/*
@@ -459,11 +204,12 @@ xpc_check_remote_hb(void)
* is large enough to contain a copy of their reserved page header and
* part_nasids mask.
*/
-static enum xp_retval
-xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
- struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
+enum xp_retval
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
+ struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
{
- int bres, i;
+ int l;
+ enum xp_retval ret;
/* get the reserved page's physical address */
@@ -472,355 +218,45 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
return xpNoRsvdPageAddr;
/* pull over the reserved page header and part_nasids mask */
- bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
- XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
- (BTE_NOTIFY | BTE_WACQUIRE), NULL);
- if (bres != BTE_SUCCESS)
- return xpc_map_bte_errors(bres);
+ ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
+ XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
+ if (ret != xpSuccess)
+ return ret;
if (discovered_nasids != NULL) {
- u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
-
- for (i = 0; i < xp_nasid_mask_words; i++)
- discovered_nasids[i] |= remote_part_nasids[i];
- }
-
- /* check that the partid is for another partition */
+ unsigned long *remote_part_nasids =
+ XPC_RP_PART_NASIDS(remote_rp);
- if (remote_rp->partid < 1 ||
- remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
- return xpInvalidPartid;
+ for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+ discovered_nasids[l] |= remote_part_nasids[l];
}
- if (remote_rp->partid == sn_partition_id)
- return xpLocalPartid;
+ /* zero timestamp indicates the reserved page has not been setup */
+ if (remote_rp->ts_jiffies == 0)
+ return xpRsvdPageNotSet;
if (XPC_VERSION_MAJOR(remote_rp->version) !=
XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
return xpBadVersion;
}
- return xpSuccess;
-}
-
-/*
- * Get a copy of the remote partition's XPC variables from the reserved page.
- *
- * remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RP_VARS_SIZE.
- */
-static enum xp_retval
-xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
- int bres;
-
- if (remote_vars_pa == 0)
- return xpVarsNotSet;
-
- /* pull over the cross partition variables */
- bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
- (BTE_NOTIFY | BTE_WACQUIRE), NULL);
- if (bres != BTE_SUCCESS)
- return xpc_map_bte_errors(bres);
-
- if (XPC_VERSION_MAJOR(remote_vars->version) !=
- XPC_VERSION_MAJOR(XPC_V_VERSION)) {
- return xpBadVersion;
- }
-
- return xpSuccess;
-}
-
-/*
- * Update the remote partition's info.
- */
-static void
-xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
- struct timespec *remote_rp_stamp, u64 remote_rp_pa,
- u64 remote_vars_pa, struct xpc_vars *remote_vars)
-{
- part->remote_rp_version = remote_rp_version;
- dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
- part->remote_rp_version);
-
- part->remote_rp_stamp = *remote_rp_stamp;
- dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
- part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
-
- part->remote_rp_pa = remote_rp_pa;
- dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
-
- part->remote_vars_pa = remote_vars_pa;
- dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
- part->remote_vars_pa);
-
- part->last_heartbeat = remote_vars->heartbeat;
- dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
- part->last_heartbeat);
-
- part->remote_vars_part_pa = remote_vars->vars_part_pa;
- dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
- part->remote_vars_part_pa);
-
- part->remote_act_nasid = remote_vars->act_nasid;
- dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
- part->remote_act_nasid);
-
- part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
- dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
- part->remote_act_phys_cpuid);
-
- part->remote_amos_page_pa = remote_vars->amos_page_pa;
- dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
- part->remote_amos_page_pa);
-
- part->remote_vars_version = remote_vars->version;
- dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
- part->remote_vars_version);
-}
-
-/*
- * Prior code has determined the nasid which generated an IPI. Inspect
- * that nasid to determine if its partition needs to be activated or
- * deactivated.
- *
- * A partition is consider "awaiting activation" if our partition
- * flags indicate it is not active and it has a heartbeat. A
- * partition is considered "awaiting deactivation" if our partition
- * flags indicate it is active but it has no heartbeat or it is not
- * sending its heartbeat to us.
- *
- * To determine the heartbeat, the remote nasid must have a properly
- * initialized reserved page.
- */
-static void
-xpc_identify_act_IRQ_req(int nasid)
-{
- struct xpc_rsvd_page *remote_rp;
- struct xpc_vars *remote_vars;
- u64 remote_rp_pa;
- u64 remote_vars_pa;
- int remote_rp_version;
- int reactivate = 0;
- int stamp_diff;
- struct timespec remote_rp_stamp = { 0, 0 };
- short partid;
- struct xpc_partition *part;
- enum xp_retval ret;
-
- /* pull over the reserved page structure */
-
- remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
-
- ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
- if (ret != xpSuccess) {
- dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
- "which sent interrupt, reason=%d\n", nasid, ret);
- return;
- }
-
- remote_vars_pa = remote_rp->vars_pa;
- remote_rp_version = remote_rp->version;
- if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
- remote_rp_stamp = remote_rp->stamp;
-
- partid = remote_rp->partid;
- part = &xpc_partitions[partid];
-
- /* pull over the cross partition variables */
-
- remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
-
- ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
- if (ret != xpSuccess) {
-
- dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
- "which sent interrupt, reason=%d\n", nasid, ret);
-
- XPC_DEACTIVATE_PARTITION(part, ret);
- return;
- }
-
- part->act_IRQ_rcvd++;
-
- dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
- "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
- remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
-
- if (xpc_partition_disengaged(part) &&
- part->act_state == XPC_P_INACTIVE) {
-
- xpc_update_partition_info(part, remote_rp_version,
- &remote_rp_stamp, remote_rp_pa,
- remote_vars_pa, remote_vars);
-
- if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
- if (xpc_partition_disengage_requested(1UL << partid)) {
- /*
- * Other side is waiting on us to disengage,
- * even though we already have.
- */
- return;
- }
- } else {
- /* other side doesn't support disengage requests */
- xpc_clear_partition_disengage_request(1UL << partid);
- }
-
- xpc_activate_partition(part);
- return;
- }
-
- DBUG_ON(part->remote_rp_version == 0);
- DBUG_ON(part->remote_vars_version == 0);
-
- if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
- DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
- remote_vars_version));
-
- if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
- DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
- version));
- /* see if the other side rebooted */
- if (part->remote_amos_page_pa ==
- remote_vars->amos_page_pa &&
- xpc_hb_allowed(sn_partition_id, remote_vars)) {
- /* doesn't look that way, so ignore the IPI */
- return;
- }
- }
-
- /*
- * Other side rebooted and previous XPC didn't support the
- * disengage request, so we don't need to do anything special.
- */
-
- xpc_update_partition_info(part, remote_rp_version,
- &remote_rp_stamp, remote_rp_pa,
- remote_vars_pa, remote_vars);
- part->reactivate_nasid = nasid;
- XPC_DEACTIVATE_PARTITION(part, xpReactivating);
- return;
- }
-
- DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
-
- if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
- DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
- /*
- * Other side rebooted and previous XPC did support the
- * disengage request, but the new one doesn't.
- */
-
- xpc_clear_partition_engaged(1UL << partid);
- xpc_clear_partition_disengage_request(1UL << partid);
-
- xpc_update_partition_info(part, remote_rp_version,
- &remote_rp_stamp, remote_rp_pa,
- remote_vars_pa, remote_vars);
- reactivate = 1;
-
- } else {
- DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
-
- stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
- &remote_rp_stamp);
- if (stamp_diff != 0) {
- DBUG_ON(stamp_diff >= 0);
-
- /*
- * Other side rebooted and the previous XPC did support
- * the disengage request, as does the new one.
- */
-
- DBUG_ON(xpc_partition_engaged(1UL << partid));
- DBUG_ON(xpc_partition_disengage_requested(1UL <<
- partid));
-
- xpc_update_partition_info(part, remote_rp_version,
- &remote_rp_stamp,
- remote_rp_pa, remote_vars_pa,
- remote_vars);
- reactivate = 1;
- }
- }
-
- if (part->disengage_request_timeout > 0 &&
- !xpc_partition_disengaged(part)) {
- /* still waiting on other side to disengage from us */
- return;
- }
-
- if (reactivate) {
- part->reactivate_nasid = nasid;
- XPC_DEACTIVATE_PARTITION(part, xpReactivating);
-
- } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
- xpc_partition_disengage_requested(1UL << partid)) {
- XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+ /* check that both remote and local partids are valid for each side */
+ if (remote_rp->SAL_partid < 0 ||
+ remote_rp->SAL_partid >= xp_max_npartitions ||
+ remote_rp->max_npartitions <= xp_partition_id) {
+ return xpInvalidPartid;
}
-}
-/*
- * Loop through the activation AMO variables and process any bits
- * which are set. Each bit indicates a nasid sending a partition
- * activation or deactivation request.
- *
- * Return #of IRQs detected.
- */
-int
-xpc_identify_act_IRQ_sender(void)
-{
- int word, bit;
- u64 nasid_mask;
- u64 nasid; /* remote nasid */
- int n_IRQs_detected = 0;
- AMO_t *act_amos;
-
- act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
-
- /* scan through act AMO variable looking for non-zero entries */
- for (word = 0; word < xp_nasid_mask_words; word++) {
-
- if (xpc_exiting)
- break;
-
- nasid_mask = xpc_IPI_receive(&act_amos[word]);
- if (nasid_mask == 0) {
- /* no IRQs from nasids in this variable */
- continue;
- }
-
- dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
- nasid_mask);
-
- /*
- * If this nasid has been added to the machine since
- * our partition was reset, this will retain the
- * remote nasid in our reserved pages machine mask.
- * This is used in the event of module reload.
- */
- xpc_mach_nasids[word] |= nasid_mask;
-
- /* locate the nasid(s) which sent interrupts */
+ if (remote_rp->SAL_partid == xp_partition_id)
+ return xpLocalPartid;
- for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
- if (nasid_mask & (1UL << bit)) {
- n_IRQs_detected++;
- nasid = XPC_NASID_FROM_W_B(word, bit);
- dev_dbg(xpc_part, "interrupt from nasid %ld\n",
- nasid);
- xpc_identify_act_IRQ_req(nasid);
- }
- }
- }
- return n_IRQs_detected;
+ return xpSuccess;
}
/*
- * See if the other side has responded to a partition disengage request
- * from us.
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
*/
int
xpc_partition_disengaged(struct xpc_partition *part)
@@ -828,41 +264,37 @@ xpc_partition_disengaged(struct xpc_partition *part)
short partid = XPC_PARTID(part);
int disengaged;
- disengaged = (xpc_partition_engaged(1UL << partid) == 0);
- if (part->disengage_request_timeout) {
+ disengaged = !xpc_partition_engaged(partid);
+ if (part->disengage_timeout) {
if (!disengaged) {
- if (time_before(jiffies,
- part->disengage_request_timeout)) {
+ if (time_is_after_jiffies(part->disengage_timeout)) {
/* timelimit hasn't been reached yet */
return 0;
}
/*
- * Other side hasn't responded to our disengage
+ * Other side hasn't responded to our deactivate
* request in a timely fashion, so assume it's dead.
*/
- dev_info(xpc_part, "disengage from remote partition %d "
- "timed out\n", partid);
- xpc_disengage_request_timedout = 1;
- xpc_clear_partition_engaged(1UL << partid);
+ dev_info(xpc_part, "deactivate request to remote "
+ "partition %d timed out\n", partid);
+ xpc_disengage_timedout = 1;
+ xpc_assume_partition_disengaged(partid);
disengaged = 1;
}
- part->disengage_request_timeout = 0;
+ part->disengage_timeout = 0;
/* cancel the timer function, provided it's not us */
- if (!in_interrupt()) {
- del_singleshot_timer_sync(&part->
- disengage_request_timer);
- }
+ if (!in_interrupt())
+ del_singleshot_timer_sync(&part->disengage_timer);
- DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
- part->act_state != XPC_P_INACTIVE);
- if (part->act_state != XPC_P_INACTIVE)
+ DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+ part->act_state != XPC_P_AS_INACTIVE);
+ if (part->act_state != XPC_P_AS_INACTIVE)
xpc_wakeup_channel_mgr(part);
- if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
- xpc_cancel_partition_disengage_request(part);
+ xpc_cancel_partition_deactivation_request(part);
}
return disengaged;
}
@@ -879,8 +311,8 @@ xpc_mark_partition_active(struct xpc_partition *part)
dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
- if (part->act_state == XPC_P_ACTIVATING) {
- part->act_state = XPC_P_ACTIVE;
+ if (part->act_state == XPC_P_AS_ACTIVATING) {
+ part->act_state = XPC_P_AS_ACTIVE;
ret = xpSuccess;
} else {
DBUG_ON(part->reason == xpSuccess);
@@ -892,7 +324,7 @@ xpc_mark_partition_active(struct xpc_partition *part)
}
/*
- * Notify XPC that the partition is down.
+ * Start the process of deactivating the specified partition.
*/
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
@@ -902,16 +334,16 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
spin_lock_irqsave(&part->act_lock, irq_flags);
- if (part->act_state == XPC_P_INACTIVE) {
+ if (part->act_state == XPC_P_AS_INACTIVE) {
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
if (reason == xpReactivating) {
/* we interrupt ourselves to reactivate partition */
- xpc_IPI_send_reactivate(part);
+ xpc_request_partition_reactivation(part);
}
return;
}
- if (part->act_state == XPC_P_DEACTIVATING) {
+ if (part->act_state == XPC_P_AS_DEACTIVATING) {
if ((part->reason == xpUnloading && reason != xpUnloading) ||
reason == xpReactivating) {
XPC_SET_REASON(part, reason, line);
@@ -920,22 +352,18 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
return;
}
- part->act_state = XPC_P_DEACTIVATING;
+ part->act_state = XPC_P_AS_DEACTIVATING;
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
- if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
- xpc_request_partition_disengage(part);
- xpc_IPI_send_disengage(part);
+ /* ask remote partition to deactivate with regard to us */
+ xpc_request_partition_deactivation(part);
- /* set a timelimit on the disengage request */
- part->disengage_request_timeout = jiffies +
- (xpc_disengage_request_timelimit * HZ);
- part->disengage_request_timer.expires =
- part->disengage_request_timeout;
- add_timer(&part->disengage_request_timer);
- }
+ /* set a timelimit on the disengage phase of the deactivation request */
+ part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+ part->disengage_timer.expires = part->disengage_timeout;
+ add_timer(&part->disengage_timer);
dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
XPC_PARTID(part), reason);
@@ -955,7 +383,7 @@ xpc_mark_partition_inactive(struct xpc_partition *part)
XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
- part->act_state = XPC_P_INACTIVE;
+ part->act_state = XPC_P_AS_INACTIVE;
spin_unlock_irqrestore(&part->act_lock, irq_flags);
part->remote_rp_pa = 0;
}
@@ -974,28 +402,22 @@ xpc_discovery(void)
{
void *remote_rp_base;
struct xpc_rsvd_page *remote_rp;
- struct xpc_vars *remote_vars;
- u64 remote_rp_pa;
- u64 remote_vars_pa;
+ unsigned long remote_rp_pa;
int region;
int region_size;
int max_regions;
int nasid;
struct xpc_rsvd_page *rp;
- short partid;
- struct xpc_partition *part;
- u64 *discovered_nasids;
+ unsigned long *discovered_nasids;
enum xp_retval ret;
remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
- xp_nasid_mask_bytes,
+ xpc_nasid_mask_nbytes,
GFP_KERNEL, &remote_rp_base);
if (remote_rp == NULL)
return;
- remote_vars = (struct xpc_vars *)remote_rp;
-
- discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
+ discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
@@ -1010,7 +432,7 @@ xpc_discovery(void)
* protection is in regards to memory, IOI and IPI.
*/
max_regions = 64;
- region_size = sn_region_size;
+ region_size = xp_region_size;
switch (region_size) {
case 128:
@@ -1038,28 +460,28 @@ xpc_discovery(void)
dev_dbg(xpc_part, "checking nasid %d\n", nasid);
- if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
+ if (test_bit(nasid / 2, xpc_part_nasids)) {
dev_dbg(xpc_part, "PROM indicates Nasid %d is "
"part of the local partition; skipping "
"region\n", nasid);
break;
}
- if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
+ if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
dev_dbg(xpc_part, "PROM indicates Nasid %d was "
"not on Numa-Link network at reset\n",
nasid);
continue;
}
- if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
+ if (test_bit(nasid / 2, discovered_nasids)) {
dev_dbg(xpc_part, "Nasid %d is part of a "
"partition which was previously "
"discovered\n", nasid);
continue;
}
- /* pull over the reserved page structure */
+ /* pull over the rsvd page header & part_nasids mask */
ret = xpc_get_remote_rp(nasid, discovered_nasids,
remote_rp, &remote_rp_pa);
@@ -1074,72 +496,8 @@ xpc_discovery(void)
continue;
}
- remote_vars_pa = remote_rp->vars_pa;
-
- partid = remote_rp->partid;
- part = &xpc_partitions[partid];
-
- /* pull over the cross partition variables */
-
- ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
- if (ret != xpSuccess) {
- dev_dbg(xpc_part, "unable to get XPC variables "
- "from nasid %d, reason=%d\n", nasid,
- ret);
-
- XPC_DEACTIVATE_PARTITION(part, ret);
- continue;
- }
-
- if (part->act_state != XPC_P_INACTIVE) {
- dev_dbg(xpc_part, "partition %d on nasid %d is "
- "already activating\n", partid, nasid);
- break;
- }
-
- /*
- * Register the remote partition's AMOs with SAL so it
- * can handle and cleanup errors within that address
- * range should the remote partition go down. We don't
- * unregister this range because it is difficult to
- * tell when outstanding writes to the remote partition
- * are finished and thus when it is thus safe to
- * unregister. This should not result in wasted space
- * in the SAL xp_addr_region table because we should
- * get the same page for remote_act_amos_pa after
- * module reloads and system reboots.
- */
- if (sn_register_xp_addr_region
- (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
- dev_dbg(xpc_part,
- "partition %d failed to "
- "register xp_addr region 0x%016lx\n",
- partid, remote_vars->amos_page_pa);
-
- XPC_SET_REASON(part, xpPhysAddrRegFailed,
- __LINE__);
- break;
- }
-
- /*
- * The remote nasid is valid and available.
- * Send an interrupt to that nasid to notify
- * it that we are ready to begin activation.
- */
- dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
- "nasid %d, phys_cpuid 0x%x\n",
- remote_vars->amos_page_pa,
- remote_vars->act_nasid,
- remote_vars->act_phys_cpuid);
-
- if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
- version)) {
- part->remote_amos_page_pa =
- remote_vars->amos_page_pa;
- xpc_mark_partition_disengaged(part);
- xpc_cancel_partition_disengage_request(part);
- }
- xpc_IPI_send_activate(remote_vars);
+ xpc_request_partition_activation(remote_rp,
+ remote_rp_pa, nasid);
}
}
@@ -1155,20 +513,16 @@ enum xp_retval
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
{
struct xpc_partition *part;
- u64 part_nasid_pa;
- int bte_res;
+ unsigned long part_nasid_pa;
part = &xpc_partitions[partid];
if (part->remote_rp_pa == 0)
return xpPartitionDown;
- memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
-
- part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+ memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
- bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
- xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
- NULL);
+ part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
- return xpc_map_bte_errors(bte_res);
+ return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
+ xpc_nasid_mask_nbytes);
}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 00000000000..b4882ccf634
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,2404 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) sn2-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/delay.h>
+#include <asm/uncached.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap. The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
+#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
+
+/*
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC and 1 amo variable to request partition deactivation.
+ */
+#define XPC_NOTIFY_IRQ_AMOS_SN2 0
+#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \
+ XP_MAX_NPARTITIONS_SN2)
+#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \
+ XP_NASID_MASK_WORDS_SN2)
+#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
+
+/*
+ * Buffer used to store a local copy of portions of a remote partition's
+ * reserved page (either its header and part_nasids mask, or its vars).
+ */
+static void *xpc_remote_copy_buffer_base_sn2;
+static char *xpc_remote_copy_buffer_sn2;
+
+static struct xpc_vars_sn2 *xpc_vars_sn2;
+static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
+
+static int
+xpc_setup_partitions_sn_sn2(void)
+{
+ /* nothing needs to be done */
+ return 0;
+}
+
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access_sn2;
+static u64 xpc_sh2_IPI_access0_sn2;
+static u64 xpc_sh2_IPI_access1_sn2;
+static u64 xpc_sh2_IPI_access2_sn2;
+static u64 xpc_sh2_IPI_access3_sn2;
+
+/*
+ * Change protections to allow IPI operations.
+ */
+static void
+xpc_allow_IPI_ops_sn2(void)
+{
+ int node;
+ int nasid;
+
+ /* !!! The following should get moved into SAL. */
+ if (is_shub2()) {
+ xpc_sh2_IPI_access0_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+ xpc_sh2_IPI_access1_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+ xpc_sh2_IPI_access2_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+ xpc_sh2_IPI_access3_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ -1UL);
+ }
+ } else {
+ xpc_sh1_IPI_access_sn2 =
+ (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ -1UL);
+ }
+ }
+}
+
+/*
+ * Restrict protections to disallow IPI operations.
+ */
+static void
+xpc_disallow_IPI_ops_sn2(void)
+{
+ int node;
+ int nasid;
+
+ /* !!! The following should get moved into SAL. */
+ if (is_shub2()) {
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+ xpc_sh2_IPI_access0_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+ xpc_sh2_IPI_access1_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+ xpc_sh2_IPI_access2_sn2);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+ xpc_sh2_IPI_access3_sn2);
+ }
+ } else {
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+ xpc_sh1_IPI_access_sn2);
+ }
+ }
+}
+
+/*
+ * The following set of functions are used for the sending and receiving of
+ * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
+ * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
+ * is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static u64
+xpc_receive_IRQ_amo_sn2(struct amo *amo)
+{
+ return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static enum xp_retval
+xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
+ int vector)
+{
+ int ret = 0;
+ unsigned long irq_flags;
+
+ local_irq_save(irq_flags);
+
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+ sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ return (ret == 0) ? xpSuccess : xpPioReadError;
+}
+
+static struct amo *
+xpc_init_IRQ_amo_sn2(int index)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page + index;
+
+ (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */
+ return amo;
+}
+
+/*
+ * Functions associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Notify the heartbeat check thread that an activate IRQ has been received.
+ */
+static irqreturn_t
+xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ xpc_activate_IRQ_rcvd++;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Flag the appropriate amo variable and send an IRQ to the specified node.
+ */
+static void
+xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
+ int to_nasid, int to_phys_cpuid)
+{
+ struct amo *amos = (struct amo *)__va(amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+ sizeof(struct amo)));
+
+ (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
+ BIT_MASK(from_nasid / 2), to_nasid,
+ to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static void
+xpc_send_local_activate_IRQ_sn2(int from_nasid)
+{
+ unsigned long irq_flags;
+ struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+ sizeof(struct amo)));
+
+ /* fake the sending and receipt of an activate IRQ from remote nasid */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
+ FETCHOP_OR, BIT_MASK(from_nasid / 2));
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ xpc_activate_IRQ_rcvd++;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+/*
+ * Functions associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Check to see if any chctl flags were sent from the specified partition.
+ */
+static void
+xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
+{
+ union xpc_channel_ctl_flags chctl;
+ unsigned long irq_flags;
+
+ chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
+ local_chctl_amo_va);
+ if (chctl.all_flags == 0)
+ return;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.all_flags |= chctl.all_flags;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
+ "0x%lx\n", XPC_PARTID(part), chctl.all_flags);
+
+ xpc_wakeup_channel_mgr(part);
+}
+
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an amo structure per partition to indicate
+ * whether a partition has sent an IRQ or not. If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ * irq - Interrupt ReQuest number. NOT USED.
+ *
+ * dev_id - partid of IRQ's potential sender.
+ */
+static irqreturn_t
+xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
+{
+ short partid = (short)(u64)dev_id;
+ struct xpc_partition *part = &xpc_partitions[partid];
+
+ DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
+
+ if (xpc_part_ref(part)) {
+ xpc_check_for_sent_chctl_flags_sn2(part);
+
+ xpc_part_deref(part);
+ }
+ return IRQ_HANDLED;
+}
+
+/*
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
+ * because the write to their associated amo variable completed after the IRQ
+ * was received.
+ */
+static void
+xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+ if (xpc_part_ref(part)) {
+ xpc_check_for_sent_chctl_flags_sn2(part);
+
+ part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
+ XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+ add_timer(&part_sn2->dropped_notify_IRQ_timer);
+ xpc_part_deref(part);
+ }
+}
+
+/*
+ * Send a notify IRQ to the remote partition that is associated with the
+ * specified channel.
+ */
+static void
+xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+ char *chctl_flag_string, unsigned long *irq_flags)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ union xpc_channel_ctl_flags chctl = { 0 };
+ enum xp_retval ret;
+
+ if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
+ chctl.flags[ch->number] = chctl_flag;
+ ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
+ chctl.all_flags,
+ part_sn2->notify_IRQ_nasid,
+ part_sn2->notify_IRQ_phys_cpuid,
+ SGI_XPC_NOTIFY);
+ dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+ chctl_flag_string, ch->partid, ch->number, ret);
+ if (unlikely(ret != xpSuccess)) {
+ if (irq_flags != NULL)
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ if (irq_flags != NULL)
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+ }
+}
+
+#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
+ xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us a notify IRQ. This faked IRQ will be handled
+ * by xpc_check_for_dropped_notify_IRQ_sn2().
+ */
+static void
+xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+ char *chctl_flag_string)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ union xpc_channel_ctl_flags chctl = { 0 };
+
+ chctl.flags[ch->number] = chctl_flag;
+ FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
+ variable), FETCHOP_OR, chctl.all_flags);
+ dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+ chctl_flag_string, ch->partid, ch->number);
+}
+
+#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
+ xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
+
+static void
+xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
+ unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->reason = ch->reason;
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->entry_size = ch->entry_size;
+ args->local_nentries = ch->local_nentries;
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+ args->remote_nentries = ch->remote_nentries;
+ args->local_nentries = ch->local_nentries;
+ args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
+{
+ XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
+{
+ XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
+}
+
+static void
+xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
+ unsigned long msgqueue_pa)
+{
+ ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+}
+
+/*
+ * This next set of functions are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static void
+xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static void
+xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ /*
+ * Send activate IRQ to get other side to see that we've cleared our
+ * bit in their engaged partitions amo.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_assume_partition_disengaged_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* clear bit(s) based on partid mask in our partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(partid));
+}
+
+static int
+xpc_partition_engaged_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* our partition's amo variable ANDed with partid mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ BIT(partid)) != 0;
+}
+
+static int
+xpc_any_partition_engaged_sn2(void)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+ /* our partition's amo variable */
+ return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
+}
+
+/* original protection values for each node */
+static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
+
+/*
+ * Change protections to allow amo operations on non-Shub 1.1 systems.
+ */
+static enum xp_retval
+xpc_allow_amo_ops_sn2(struct amo *amos_page)
+{
+ u64 nasid_array = 0;
+ int ret;
+
+ /*
+ * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
+ * collides with memory operations. On those systems we call
+ * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
+ */
+ if (!enable_shub_wars_1_1()) {
+ ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE,
+ SN_MEMPROT_ACCESS_CLASS_1,
+ &nasid_array);
+ if (ret != 0)
+ return xpSalError;
+ }
+ return xpSuccess;
+}
+
+/*
+ * Change protections to allow amo operations on Shub 1.1 systems.
+ */
+static void
+xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
+{
+ int node;
+ int nasid;
+
+ if (!enable_shub_wars_1_1())
+ return;
+
+ for_each_online_node(node) {
+ nasid = cnodeid_to_nasid(node);
+ /* save current protection values */
+ xpc_prot_vec_sn2[node] =
+ (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+ /* open up everything */
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+ -1UL);
+ HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+ SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+ -1UL);
+ }
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
+ size_t *len)
+{
+ s64 status;
+ enum xp_retval ret;
+
+ status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
+ if (status == SALRET_OK)
+ ret = xpSuccess;
+ else if (status == SALRET_MORE_PASSES)
+ ret = xpNeedMoreInfo;
+ else
+ ret = xpSalError;
+
+ return ret;
+}
+
+
+static int
+xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
+{
+ struct amo *amos_page;
+ int i;
+ int ret;
+
+ xpc_vars_sn2 = XPC_RP_VARS(rp);
+
+ rp->sn.vars_pa = xp_pa(xpc_vars_sn2);
+
+ /* vars_part array follows immediately after vars */
+ xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+ XPC_RP_VARS_SIZE);
+
+ /*
+ * Before clearing xpc_vars_sn2, see if a page of amos had been
+ * previously allocated. If not we'll need to allocate one and set
+ * permissions so that cross-partition amos are allowed.
+ *
+ * The allocated amo page needs MCA reporting to remain disabled after
+ * XPC has unloaded. To make this work, we keep a copy of the pointer
+ * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
+ * which is pointed to by the reserved page, and re-use that saved copy
+ * on subsequent loads of XPC. This amo page is never freed, and its
+ * memory protections are never restricted.
+ */
+ amos_page = xpc_vars_sn2->amos_page;
+ if (amos_page == NULL) {
+ amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
+ if (amos_page == NULL) {
+ dev_err(xpc_part, "can't allocate page of amos\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Open up amo-R/W to cpu. This is done on Shub 1.1 systems
+ * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
+ */
+ ret = xpc_allow_amo_ops_sn2(amos_page);
+ if (ret != xpSuccess) {
+ dev_err(xpc_part, "can't allow amo operations\n");
+ uncached_free_page(__IA64_UNCACHED_OFFSET |
+ TO_PHYS((u64)amos_page), 1);
+ return -EPERM;
+ }
+ }
+
+ /* clear xpc_vars_sn2 */
+ memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
+
+ xpc_vars_sn2->version = XPC_V_VERSION;
+ xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
+ xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
+ xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
+ xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
+ xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */
+
+ /* clear xpc_vars_part_sn2 */
+ memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
+ XP_MAX_NPARTITIONS_SN2);
+
+ /* initialize the activate IRQ related amo variables */
+ for (i = 0; i < xpc_nasid_mask_nlongs; i++)
+ (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
+
+ /* initialize the engaged remote partitions related amo variables */
+ (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
+ (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
+
+ return 0;
+}
+
+static void
+xpc_increment_heartbeat_sn2(void)
+{
+ xpc_vars_sn2->heartbeat++;
+}
+
+static void
+xpc_offline_heartbeat_sn2(void)
+{
+ xpc_increment_heartbeat_sn2();
+ xpc_vars_sn2->heartbeat_offline = 1;
+}
+
+static void
+xpc_online_heartbeat_sn2(void)
+{
+ xpc_increment_heartbeat_sn2();
+ xpc_vars_sn2->heartbeat_offline = 0;
+}
+
+static void
+xpc_heartbeat_init_sn2(void)
+{
+ DBUG_ON(xpc_vars_sn2 == NULL);
+
+ bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+ xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
+ xpc_online_heartbeat_sn2();
+}
+
+static void
+xpc_heartbeat_exit_sn2(void)
+{
+ xpc_offline_heartbeat_sn2();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
+{
+ struct xpc_vars_sn2 *remote_vars;
+ enum xp_retval ret;
+
+ remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+ /* pull the remote vars structure that contains the heartbeat */
+ ret = xp_remote_memcpy(xp_pa(remote_vars),
+ part->sn.sn2.remote_vars_pa,
+ XPC_RP_VARS_SIZE);
+ if (ret != xpSuccess)
+ return ret;
+
+ dev_dbg(xpc_part, "partid=%d, heartbeat=%ld, last_heartbeat=%ld, "
+ "heartbeat_offline=%ld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
+ remote_vars->heartbeat, part->last_heartbeat,
+ remote_vars->heartbeat_offline,
+ remote_vars->heartbeating_to_mask[0]);
+
+ if ((remote_vars->heartbeat == part->last_heartbeat &&
+ remote_vars->heartbeat_offline == 0) ||
+ !xpc_hb_allowed(sn_partition_id,
+ &remote_vars->heartbeating_to_mask)) {
+ ret = xpNoHeartbeat;
+ } else {
+ part->last_heartbeat = remote_vars->heartbeat;
+ }
+
+ return ret;
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xp_retval
+xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
+ struct xpc_vars_sn2 *remote_vars)
+{
+ enum xp_retval ret;
+
+ if (remote_vars_pa == 0)
+ return xpVarsNotSet;
+
+ /* pull over the cross partition variables */
+ ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
+ XPC_RP_VARS_SIZE);
+ if (ret != xpSuccess)
+ return ret;
+
+ if (XPC_VERSION_MAJOR(remote_vars->version) !=
+ XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+ return xpBadVersion;
+ }
+
+ return xpSuccess;
+}
+
+static void
+xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+ unsigned long remote_rp_pa, int nasid)
+{
+ xpc_send_local_activate_IRQ_sn2(nasid);
+}
+
+static void
+xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
+{
+ xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
+}
+
+static void
+xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+ (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+ BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+
+ /*
+ * Send activate IRQ to get other side to see that we've set our
+ * bit in their deactivate request amo.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+ (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+ sizeof(struct amo)));
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's amo */
+ FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+ ~BIT(sn_partition_id));
+
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IRQs and amos to it until the heartbeat times out.
+ */
+ (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable),
+ xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static int
+xpc_partition_deactivation_requested_sn2(short partid)
+{
+ struct amo *amo = xpc_vars_sn2->amos_page +
+ XPC_DEACTIVATE_REQUEST_AMO_SN2;
+
+ /* our partition's amo variable ANDed with partid mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+ BIT(partid)) != 0;
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
+ unsigned long *remote_rp_ts_jiffies,
+ unsigned long remote_rp_pa,
+ unsigned long remote_vars_pa,
+ struct xpc_vars_sn2 *remote_vars)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+ part->remote_rp_version = remote_rp_version;
+ dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
+ part->remote_rp_version);
+
+ part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
+ dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n",
+ part->remote_rp_ts_jiffies);
+
+ part->remote_rp_pa = remote_rp_pa;
+ dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+ part_sn2->remote_vars_pa = remote_vars_pa;
+ dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
+ part_sn2->remote_vars_pa);
+
+ part->last_heartbeat = remote_vars->heartbeat;
+ dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
+ part->last_heartbeat);
+
+ part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
+ dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
+ part_sn2->remote_vars_part_pa);
+
+ part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
+ dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n",
+ part_sn2->activate_IRQ_nasid);
+
+ part_sn2->activate_IRQ_phys_cpuid =
+ remote_vars->activate_IRQ_phys_cpuid;
+ dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n",
+ part_sn2->activate_IRQ_phys_cpuid);
+
+ part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
+ dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
+ part_sn2->remote_amos_page_pa);
+
+ part_sn2->remote_vars_version = remote_vars->version;
+ dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
+ part_sn2->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated a activate IRQ.
+ * Inspect that nasid to determine if its partition needs to be activated
+ * or deactivated.
+ *
+ * A partition is considered "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat. A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_activate_IRQ_req_sn2(int nasid)
+{
+ struct xpc_rsvd_page *remote_rp;
+ struct xpc_vars_sn2 *remote_vars;
+ unsigned long remote_rp_pa;
+ unsigned long remote_vars_pa;
+ int remote_rp_version;
+ int reactivate = 0;
+ unsigned long remote_rp_ts_jiffies = 0;
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_partition_sn2 *part_sn2;
+ enum xp_retval ret;
+
+ /* pull over the reserved page structure */
+
+ remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
+
+ ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+ if (ret != xpSuccess) {
+ dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+ return;
+ }
+
+ remote_vars_pa = remote_rp->sn.vars_pa;
+ remote_rp_version = remote_rp->version;
+ remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+
+ partid = remote_rp->SAL_partid;
+ part = &xpc_partitions[partid];
+ part_sn2 = &part->sn.sn2;
+
+ /* pull over the cross partition variables */
+
+ remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+ ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
+ if (ret != xpSuccess) {
+ dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+ "which sent interrupt, reason=%d\n", nasid, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return;
+ }
+
+ part->activate_IRQ_rcvd++;
+
+ dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+ "%ld:0x%lx\n", (int)nasid, (int)partid, part->activate_IRQ_rcvd,
+ remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
+
+ if (xpc_partition_disengaged(part) &&
+ part->act_state == XPC_P_AS_INACTIVE) {
+
+ xpc_update_partition_info_sn2(part, remote_rp_version,
+ &remote_rp_ts_jiffies,
+ remote_rp_pa, remote_vars_pa,
+ remote_vars);
+
+ if (xpc_partition_deactivation_requested_sn2(partid)) {
+ /*
+ * Other side is waiting on us to deactivate even though
+ * we already have.
+ */
+ return;
+ }
+
+ xpc_activate_partition(part);
+ return;
+ }
+
+ DBUG_ON(part->remote_rp_version == 0);
+ DBUG_ON(part_sn2->remote_vars_version == 0);
+
+ if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
+
+ /* the other side rebooted */
+
+ DBUG_ON(xpc_partition_engaged_sn2(partid));
+ DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
+
+ xpc_update_partition_info_sn2(part, remote_rp_version,
+ &remote_rp_ts_jiffies,
+ remote_rp_pa, remote_vars_pa,
+ remote_vars);
+ reactivate = 1;
+ }
+
+ if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
+ /* still waiting on other side to disengage from us */
+ return;
+ }
+
+ if (reactivate)
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+ else if (xpc_partition_deactivation_requested_sn2(partid))
+ XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+}
+
+/*
+ * Loop through the activation amo variables and process any bits
+ * which are set. Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_activate_IRQ_sender_sn2(void)
+{
+ int l;
+ int b;
+ unsigned long nasid_mask_long;
+ u64 nasid; /* remote nasid */
+ int n_IRQs_detected = 0;
+ struct amo *act_amos;
+
+ act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
+
+ /* scan through activate amo variables looking for non-zero entries */
+ for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
+
+ if (xpc_exiting)
+ break;
+
+ nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
+
+ b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
+ if (b >= BITS_PER_LONG) {
+ /* no IRQs from nasids in this amo variable */
+ continue;
+ }
+
+ dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
+ nasid_mask_long);
+
+ /*
+ * If this nasid has been added to the machine since
+ * our partition was reset, this will retain the
+ * remote nasid in our reserved pages machine mask.
+ * This is used in the event of module reload.
+ */
+ xpc_mach_nasids[l] |= nasid_mask_long;
+
+ /* locate the nasid(s) which sent interrupts */
+
+ do {
+ n_IRQs_detected++;
+ nasid = (l * BITS_PER_LONG + b) * 2;
+ dev_dbg(xpc_part, "interrupt from nasid %ld\n", nasid);
+ xpc_identify_activate_IRQ_req_sn2(nasid);
+
+ b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
+ b + 1);
+ } while (b < BITS_PER_LONG);
+ }
+ return n_IRQs_detected;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_sn2(void)
+{
+ unsigned long irq_flags;
+ int n_IRQs_expected;
+ int n_IRQs_detected;
+
+ DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ n_IRQs_expected = xpc_activate_IRQ_rcvd;
+ xpc_activate_IRQ_rcvd = 0;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
+ if (n_IRQs_detected < n_IRQs_expected) {
+ /* retry once to help avoid missing amo */
+ (void)xpc_identify_activate_IRQ_sender_sn2();
+ }
+}
+
+/*
+ * Setup the channel structures that are sn2 specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ struct xpc_channel_sn2 *ch_sn2;
+ enum xp_retval retval;
+ int ret;
+ int cpuid;
+ int ch_number;
+ struct timer_list *timer;
+ short partid = XPC_PARTID(part);
+
+ /* allocate all the required GET/PUT values */
+
+ part_sn2->local_GPs =
+ xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+ &part_sn2->local_GPs_base);
+ if (part_sn2->local_GPs == NULL) {
+ dev_err(xpc_chan, "can't get memory for local get/put "
+ "values\n");
+ return xpNoMemory;
+ }
+
+ part_sn2->remote_GPs =
+ xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+ &part_sn2->remote_GPs_base);
+ if (part_sn2->remote_GPs == NULL) {
+ dev_err(xpc_chan, "can't get memory for remote get/put "
+ "values\n");
+ retval = xpNoMemory;
+ goto out_1;
+ }
+
+ part_sn2->remote_GPs_pa = 0;
+
+ /* allocate all the required open and close args */
+
+ part_sn2->local_openclose_args =
+ xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+ GFP_KERNEL, &part_sn2->
+ local_openclose_args_base);
+ if (part_sn2->local_openclose_args == NULL) {
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
+ retval = xpNoMemory;
+ goto out_2;
+ }
+
+ part_sn2->remote_openclose_args_pa = 0;
+
+ part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
+
+ part_sn2->notify_IRQ_nasid = 0;
+ part_sn2->notify_IRQ_phys_cpuid = 0;
+ part_sn2->remote_chctl_amo_va = NULL;
+
+ sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
+ ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
+ IRQF_SHARED, part_sn2->notify_IRQ_owner,
+ (void *)(u64)partid);
+ if (ret != 0) {
+ dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+ "errno=%d\n", -ret);
+ retval = xpLackOfResources;
+ goto out_3;
+ }
+
+ /* Setup a timer to check for dropped notify IRQs */
+ timer = &part_sn2->dropped_notify_IRQ_timer;
+ init_timer(timer);
+ timer->function =
+ (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
+ timer->data = (unsigned long)part;
+ timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+ add_timer(timer);
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch_sn2 = &part->channels[ch_number].sn.sn2;
+
+ ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
+ ch_sn2->local_openclose_args =
+ &part_sn2->local_openclose_args[ch_number];
+
+ mutex_init(&ch_sn2->msg_to_pull_mutex);
+ }
+
+ /*
+ * Setup the per partition specific variables required by the
+ * remote partition to establish channel connections with us.
+ *
+ * The setting of the magic # indicates that these per partition
+ * specific variables are ready to be used.
+ */
+ xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
+ xpc_vars_part_sn2[partid].openclose_args_pa =
+ xp_pa(part_sn2->local_openclose_args);
+ xpc_vars_part_sn2[partid].chctl_amo_pa =
+ xp_pa(part_sn2->local_chctl_amo_va);
+ cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
+ xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+ xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
+ cpu_physical_id(cpuid);
+ xpc_vars_part_sn2[partid].nchannels = part->nchannels;
+ xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
+
+ return xpSuccess;
+
+ /* setup of ch structures failed */
+out_3:
+ kfree(part_sn2->local_openclose_args_base);
+ part_sn2->local_openclose_args = NULL;
+out_2:
+ kfree(part_sn2->remote_GPs_base);
+ part_sn2->remote_GPs = NULL;
+out_1:
+ kfree(part_sn2->local_GPs_base);
+ part_sn2->local_GPs = NULL;
+ return retval;
+}
+
+/*
+ * Teardown the channel structures that are sn2 specific.
+ */
+static void
+xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ short partid = XPC_PARTID(part);
+
+ /*
+ * Indicate that the variables specific to the remote partition are no
+ * longer available for its use.
+ */
+ xpc_vars_part_sn2[partid].magic = 0;
+
+ /* in case we've still got outstanding timers registered... */
+ del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
+ free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+ kfree(part_sn2->local_openclose_args_base);
+ part_sn2->local_openclose_args = NULL;
+ kfree(part_sn2->remote_GPs_base);
+ part_sn2->remote_GPs = NULL;
+ kfree(part_sn2->local_GPs_base);
+ part_sn2->local_GPs = NULL;
+ part_sn2->local_chctl_amo_va = NULL;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src_pa must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be cacheline sized
+ */
+/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+static enum xp_retval
+xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
+ const unsigned long src_pa, size_t cnt)
+{
+ enum xp_retval ret;
+
+ DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
+ DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
+ DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+
+ ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+ " ret=%d\n", XPC_PARTID(part), ret);
+ }
+ return ret;
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+static enum xp_retval
+xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ u8 buffer[L1_CACHE_BYTES * 2];
+ struct xpc_vars_part_sn2 *pulled_entry_cacheline =
+ (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
+ struct xpc_vars_part_sn2 *pulled_entry;
+ unsigned long remote_entry_cacheline_pa;
+ unsigned long remote_entry_pa;
+ short partid = XPC_PARTID(part);
+ enum xp_retval ret;
+
+ /* pull the cacheline that contains the variables we're interested in */
+
+ DBUG_ON(part_sn2->remote_vars_part_pa !=
+ L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
+ DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
+
+ remote_entry_pa = part_sn2->remote_vars_part_pa +
+ sn_partition_id * sizeof(struct xpc_vars_part_sn2);
+
+ remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+ pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
+ + (remote_entry_pa &
+ (L1_CACHE_BYTES - 1)));
+
+ ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
+ remote_entry_cacheline_pa,
+ L1_CACHE_BYTES);
+ if (ret != xpSuccess) {
+ dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+ "partition %d, ret=%d\n", partid, ret);
+ return ret;
+ }
+
+ /* see if they've been set up yet */
+
+ if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
+ pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
+
+ if (pulled_entry->magic != 0) {
+ dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d has bad magic value (=0x%lx)\n",
+ partid, sn_partition_id, pulled_entry->magic);
+ return xpBadMagic;
+ }
+
+ /* they've not been initialized yet */
+ return xpRetry;
+ }
+
+ if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
+
+ /* validate the variables */
+
+ if (pulled_entry->GPs_pa == 0 ||
+ pulled_entry->openclose_args_pa == 0 ||
+ pulled_entry->chctl_amo_pa == 0) {
+
+ dev_err(xpc_chan, "partition %d's XPC vars_part for "
+ "partition %d are not valid\n", partid,
+ sn_partition_id);
+ return xpInvalidAddress;
+ }
+
+ /* the variables we imported look to be valid */
+
+ part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
+ part_sn2->remote_openclose_args_pa =
+ pulled_entry->openclose_args_pa;
+ part_sn2->remote_chctl_amo_va =
+ (struct amo *)__va(pulled_entry->chctl_amo_pa);
+ part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
+ part_sn2->notify_IRQ_phys_cpuid =
+ pulled_entry->notify_IRQ_phys_cpuid;
+
+ if (part->nchannels > pulled_entry->nchannels)
+ part->nchannels = pulled_entry->nchannels;
+
+ /* let the other side know that we've pulled their variables */
+
+ xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
+ }
+
+ if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
+ return xpRetry;
+
+ return xpSuccess;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xp_retval
+xpc_make_first_contact_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ enum xp_retval ret;
+
+ /*
+ * Register the remote partition's amos with SAL so it can handle
+ * and cleanup errors within that address range should the remote
+ * partition go down. We don't unregister this range because it is
+ * difficult to tell when outstanding writes to the remote partition
+ * are finished and thus when it is safe to unregister. This should
+ * not result in wasted space in the SAL xp_addr_region table because
+ * we should get the same page for remote_amos_page_pa after module
+ * reloads and system reboots.
+ */
+ if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
+ PAGE_SIZE, 1) < 0) {
+ dev_warn(xpc_part, "xpc_activating(%d) failed to register "
+ "xp_addr region\n", XPC_PARTID(part));
+
+ ret = xpPhysAddrRegFailed;
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return ret;
+ }
+
+ /*
+ * Send activate IRQ to get other side to activate if they've not
+ * already begun to do so.
+ */
+ xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+ cnodeid_to_nasid(0),
+ part_sn2->activate_IRQ_nasid,
+ part_sn2->activate_IRQ_phys_cpuid);
+
+ while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
+ if (ret != xpRetry) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+ return ret;
+ }
+
+ dev_dbg(xpc_part, "waiting to make first contact with "
+ "partition %d\n", XPC_PARTID(part));
+
+ /* wait a 1/4 of a second or so */
+ (void)msleep_interruptible(250);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+ }
+
+ return xpSuccess;
+}
+
+/*
+ * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
+{
+ struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+ unsigned long irq_flags;
+ union xpc_channel_ctl_flags chctl;
+ enum xp_retval ret;
+
+ /*
+ * See if there are any chctl flags to be handled.
+ */
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ chctl = part->chctl;
+ if (chctl.all_flags != 0)
+ part->chctl.all_flags = 0;
+
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ if (xpc_any_openclose_chctl_flags_set(&chctl)) {
+ ret = xpc_pull_remote_cachelines_sn2(part, part->
+ remote_openclose_args,
+ part_sn2->
+ remote_openclose_args_pa,
+ XPC_OPENCLOSE_ARGS_SIZE);
+ if (ret != xpSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull openclose args from "
+ "partition %d, ret=%d\n", XPC_PARTID(part),
+ ret);
+
+ /* don't bother processing chctl flags anymore */
+ chctl.all_flags = 0;
+ }
+ }
+
+ if (xpc_any_msg_chctl_flags_set(&chctl)) {
+ ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
+ part_sn2->remote_GPs_pa,
+ XPC_GP_SIZE);
+ if (ret != xpSuccess) {
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ dev_dbg(xpc_chan, "failed to pull GPs from partition "
+ "%d, ret=%d\n", XPC_PARTID(part), ret);
+
+ /* don't bother processing chctl flags anymore */
+ chctl.all_flags = 0;
+ }
+ }
+
+ return chctl.all_flags;
+}
+
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xp_retval
+xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->entry_size;
+ ch_sn2->local_msgqueue =
+ xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
+ &ch_sn2->local_msgqueue_base);
+ if (ch_sn2->local_msgqueue == NULL)
+ continue;
+
+ nbytes = nentries * sizeof(struct xpc_notify_sn2);
+ ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_sn2->notify_queue == NULL) {
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ continue;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->local_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->local_nentries, ch->partid, ch->number);
+
+ ch->local_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+ "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xp_retval
+xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long irq_flags;
+ int nentries;
+ size_t nbytes;
+
+ DBUG_ON(ch->remote_nentries <= 0);
+
+ for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+ nbytes = nentries * ch->entry_size;
+ ch_sn2->remote_msgqueue =
+ xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
+ remote_msgqueue_base);
+ if (ch_sn2->remote_msgqueue == NULL)
+ continue;
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->remote_nentries) {
+ dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+ "partid=%d, channel=%d\n", nentries,
+ ch->remote_nentries, ch->partid, ch->number);
+
+ ch->remote_nentries = nentries;
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+ "partid=%d, channel=%d\n", ch->partid, ch->number);
+ return xpNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ enum xp_retval ret;
+
+ DBUG_ON(ch->flags & XPC_C_SETUP);
+
+ ret = xpc_allocate_local_msgqueue_sn2(ch);
+ if (ret == xpSuccess) {
+
+ ret = xpc_allocate_remote_msgqueue_sn2(ch);
+ if (ret != xpSuccess) {
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ kfree(ch_sn2->notify_queue);
+ ch_sn2->notify_queue = NULL;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ ch_sn2->remote_msgqueue_pa = 0;
+
+ ch_sn2->local_GP->get = 0;
+ ch_sn2->local_GP->put = 0;
+ ch_sn2->remote_GP.get = 0;
+ ch_sn2->remote_GP.put = 0;
+ ch_sn2->w_local_GP.get = 0;
+ ch_sn2->w_local_GP.put = 0;
+ ch_sn2->w_remote_GP.get = 0;
+ ch_sn2->w_remote_GP.put = 0;
+ ch_sn2->next_msg_to_pull = 0;
+
+ if (ch->flags & XPC_C_SETUP) {
+ dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+ ch->flags, ch->partid, ch->number);
+
+ kfree(ch_sn2->local_msgqueue_base);
+ ch_sn2->local_msgqueue = NULL;
+ kfree(ch_sn2->remote_msgqueue_base);
+ ch_sn2->remote_msgqueue = NULL;
+ kfree(ch_sn2->notify_queue);
+ ch_sn2->notify_queue = NULL;
+ }
+}
+
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
+{
+ struct xpc_notify_sn2 *notify;
+ u8 notify_type;
+ s64 get = ch->sn.sn2.w_remote_GP.get - 1;
+
+ while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+ notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
+
+ /*
+ * See if the notify entry indicates it was associated with
+ * a message who's sender wants to be notified. It is possible
+ * that it is, but someone else is doing or has done the
+ * notification.
+ */
+ notify_type = notify->type;
+ if (notify_type == 0 ||
+ cmpxchg(&notify->type, notify_type, 0) != notify_type) {
+ continue;
+ }
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ atomic_dec(&ch->n_to_notify);
+
+ if (notify->func != NULL) {
+ dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
+ "msg_number=%ld partid=%d channel=%d\n",
+ (void *)notify, get, ch->partid, ch->number);
+
+ notify->func(reason, ch->partid, ch->number,
+ notify->key);
+
+ dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
+ " msg_number=%ld partid=%d channel=%d\n",
+ (void *)notify, get, ch->partid, ch->number);
+ }
+ }
+}
+
+static void
+xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
+{
+ xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 get;
+
+ get = ch_sn2->w_remote_GP.get;
+ do {
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+ (get % ch->local_nentries) *
+ ch->entry_size);
+ msg->flags = 0;
+ } while (++get < ch_sn2->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 put;
+
+ put = ch_sn2->w_remote_GP.put;
+ do {
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+ (put % ch->remote_nentries) *
+ ch->entry_size);
+ msg->flags = 0;
+ } while (++put < ch_sn2->remote_GP.put);
+}
+
+static int
+xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
+{
+ return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
+static void
+xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
+{
+ struct xpc_channel *ch = &part->channels[ch_number];
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ int npayloads_sent;
+
+ ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
+
+ /* See what, if anything, has changed for each connected channel */
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
+ ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
+ /* nothing changed since GPs were last pulled */
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /*
+ * First check to see if messages recently sent by us have been
+ * received by the other side. (The remote GET value will have
+ * changed since we last looked at it.)
+ */
+
+ if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
+
+ /*
+ * We need to notify any senders that want to be notified
+ * that their sent messages have been received by their
+ * intended recipients. We need to do this before updating
+ * w_remote_GP.get so that we don't allocate the same message
+ * queue entries prematurely (see xpc_allocate_msg()).
+ */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /*
+ * Notify senders that messages sent have been
+ * received and delivered by the other side.
+ */
+ xpc_notify_senders_sn2(ch, xpMsgDelivered,
+ ch_sn2->remote_GP.get);
+ }
+
+ /*
+ * Clear msg->flags in previously sent messages, so that
+ * they're ready for xpc_allocate_msg().
+ */
+ xpc_clear_local_msgqueue_flags_sn2(ch);
+
+ ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
+
+ dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
+ "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
+ ch->number);
+
+ /*
+ * If anyone was waiting for message queue entries to become
+ * available, wake them up.
+ */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+ }
+
+ /*
+ * Now check for newly sent messages by the other side. (The remote
+ * PUT value will have changed since we last looked at it.)
+ */
+
+ if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
+ /*
+ * Clear msg->flags in previously received messages, so that
+ * they're ready for xpc_get_deliverable_payload_sn2().
+ */
+ xpc_clear_remote_msgqueue_flags_sn2(ch);
+
+ ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
+
+ dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
+ "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
+ ch->number);
+
+ npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
+ if (npayloads_sent > 0) {
+ dev_dbg(xpc_chan, "msgs waiting to be copied and "
+ "delivered=%d, partid=%d, channel=%d\n",
+ npayloads_sent, ch->partid, ch->number);
+
+ if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+ xpc_activate_kthreads(ch, npayloads_sent);
+ }
+ }
+
+ xpc_msgqueue_deref(ch);
+}
+
+static struct xpc_msg_sn2 *
+xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->partid];
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ unsigned long remote_msg_pa;
+ struct xpc_msg_sn2 *msg;
+ u32 msg_index;
+ u32 nmsgs;
+ u64 msg_offset;
+ enum xp_retval ret;
+
+ if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
+ /* we were interrupted by a signal */
+ return NULL;
+ }
+
+ while (get >= ch_sn2->next_msg_to_pull) {
+
+ /* pull as many messages as are ready and able to be pulled */
+
+ msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
+
+ DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
+ nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
+ if (msg_index + nmsgs > ch->remote_nentries) {
+ /* ignore the ones that wrap the msg queue for now */
+ nmsgs = ch->remote_nentries - msg_index;
+ }
+
+ msg_offset = msg_index * ch->entry_size;
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+ msg_offset);
+ remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
+
+ ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
+ nmsgs * ch->entry_size);
+ if (ret != xpSuccess) {
+
+ dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+ " msg %ld from partition %d, channel=%d, "
+ "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
+ ch->partid, ch->number, ret);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+ return NULL;
+ }
+
+ ch_sn2->next_msg_to_pull += nmsgs;
+ }
+
+ mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+
+ /* return the message we were looking for */
+ msg_offset = (get % ch->remote_nentries) * ch->entry_size;
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
+
+ return msg;
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ void *payload = NULL;
+ s64 get;
+
+ do {
+ if (ch->flags & XPC_C_DISCONNECTING)
+ break;
+
+ get = ch_sn2->w_local_GP.get;
+ rmb(); /* guarantee that .get loads before .put */
+ if (get == ch_sn2->w_remote_GP.put)
+ break;
+
+ /* There are messages waiting to be pulled and delivered.
+ * We need to try to secure one for ourselves. We'll do this
+ * by trying to increment w_local_GP.get and hope that no one
+ * else beats us to it. If they do, we'll we'll simply have
+ * to try again for the next one.
+ */
+
+ if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
+ /* we got the entry referenced by get */
+
+ dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
+ "partid=%d, channel=%d\n", get + 1,
+ ch->partid, ch->number);
+
+ /* pull the message from the remote partition */
+
+ msg = xpc_pull_remote_msg_sn2(ch, get);
+
+ DBUG_ON(msg != NULL && msg->number != get);
+ DBUG_ON(msg != NULL && (msg->flags & XPC_M_SN2_DONE));
+ DBUG_ON(msg != NULL && !(msg->flags & XPC_M_SN2_READY));
+
+ payload = &msg->payload;
+ break;
+ }
+
+ } while (1);
+
+ return payload;
+}
+
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send a chctl msgrequest to the
+ * recipient partition.
+ */
+static void
+xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 put = initial_put + 1;
+ int send_msgrequest = 0;
+
+ while (1) {
+
+ while (1) {
+ if (put == ch_sn2->w_local_GP.put)
+ break;
+
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+ local_msgqueue + (put %
+ ch->local_nentries) *
+ ch->entry_size);
+
+ if (!(msg->flags & XPC_M_SN2_READY))
+ break;
+
+ put++;
+ }
+
+ if (put == initial_put) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
+ initial_put) {
+ /* someone else beat us to it */
+ DBUG_ON(ch_sn2->local_GP->put < initial_put);
+ break;
+ }
+
+ /* we just set the new value of local_GP->put */
+
+ dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
+ "channel=%d\n", put, ch->partid, ch->number);
+
+ send_msgrequest = 1;
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
+ * equals w_local_GP.put, so we'll go have a look.
+ */
+ initial_put = put;
+ }
+
+ if (send_msgrequest)
+ xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xp_retval
+xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
+ struct xpc_msg_sn2 **address_of_msg)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ enum xp_retval ret;
+ s64 put;
+
+ /*
+ * Get the next available message entry from the local message queue.
+ * If none are available, we'll make sure that we grab the latest
+ * GP values.
+ */
+ ret = xpTimeout;
+
+ while (1) {
+
+ put = ch_sn2->w_local_GP.put;
+ rmb(); /* guarantee that .put loads before .get */
+ if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
+
+ /* There are available message entries. We need to try
+ * to secure one for ourselves. We'll do this by trying
+ * to increment w_local_GP.put as long as someone else
+ * doesn't beat us to it. If they do, we'll have to
+ * try again.
+ */
+ if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
+ put) {
+ /* we got the entry referenced by put */
+ break;
+ }
+ continue; /* try again */
+ }
+
+ /*
+ * There aren't any available msg entries at this time.
+ *
+ * In waiting for a message entry to become available,
+ * we set a timeout in case the other side is not sending
+ * completion interrupts. This lets us fake a notify IRQ
+ * that will cause the notify IRQ handler to fetch the latest
+ * GP values as if an interrupt was sent by the other side.
+ */
+ if (ret == xpTimeout)
+ xpc_send_chctl_local_msgrequest_sn2(ch);
+
+ if (flags & XPC_NOWAIT)
+ return xpNoWait;
+
+ ret = xpc_allocate_msg_wait(ch);
+ if (ret != xpInterrupted && ret != xpTimeout)
+ return ret;
+ }
+
+ /* get the message's address and initialize it */
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+ (put % ch->local_nentries) *
+ ch->entry_size);
+
+ DBUG_ON(msg->flags != 0);
+ msg->number = put;
+
+ dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
+ (void *)msg, msg->number, ch->partid, ch->number);
+
+ *address_of_msg = msg;
+ return xpSuccess;
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends a chctl msgrequest to the
+ * partition the message is being sent to.
+ */
+static enum xp_retval
+xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+ u16 payload_size, u8 notify_type, xpc_notify_func func,
+ void *key)
+{
+ enum xp_retval ret = xpSuccess;
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg = msg;
+ struct xpc_notify_sn2 *notify = notify;
+ s64 msg_number;
+ s64 put;
+
+ DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+
+ if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
+ return xpPayloadTooBig;
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_1;
+ }
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ ret = xpNotConnected;
+ goto out_1;
+ }
+
+ ret = xpc_allocate_msg_sn2(ch, flags, &msg);
+ if (ret != xpSuccess)
+ goto out_1;
+
+ msg_number = msg->number;
+
+ if (notify_type != 0) {
+ /*
+ * Tell the remote side to send an ACK interrupt when the
+ * message has been delivered.
+ */
+ msg->flags |= XPC_M_SN2_INTERRUPT;
+
+ atomic_inc(&ch->n_to_notify);
+
+ notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
+ notify->func = func;
+ notify->key = key;
+ notify->type = notify_type;
+
+ /* ??? Is a mb() needed here? */
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ /*
+ * An error occurred between our last error check and
+ * this one. We will try to clear the type field from
+ * the notify entry. If we succeed then
+ * xpc_disconnect_channel() didn't already process
+ * the notify entry.
+ */
+ if (cmpxchg(&notify->type, notify_type, 0) ==
+ notify_type) {
+ atomic_dec(&ch->n_to_notify);
+ ret = ch->reason;
+ }
+ goto out_1;
+ }
+ }
+
+ memcpy(&msg->payload, payload, payload_size);
+
+ msg->flags |= XPC_M_SN2_READY;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of local_GP->put.
+ */
+ mb();
+
+ /* see if the message is next in line to be sent, if so send it */
+
+ put = ch_sn2->local_GP->put;
+ if (put == msg_number)
+ xpc_send_msgs_sn2(ch, put);
+
+out_1:
+ xpc_msgqueue_deref(ch);
+ return ret;
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send a chctl msgrequest to the message sender's partition.
+ *
+ * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
+ */
+static void
+xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+ struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+ struct xpc_msg_sn2 *msg;
+ s64 get = initial_get + 1;
+ int send_msgrequest = 0;
+
+ while (1) {
+
+ while (1) {
+ if (get == ch_sn2->w_local_GP.get)
+ break;
+
+ msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+ remote_msgqueue + (get %
+ ch->remote_nentries) *
+ ch->entry_size);
+
+ if (!(msg->flags & XPC_M_SN2_DONE))
+ break;
+
+ msg_flags |= msg->flags;
+ get++;
+ }
+
+ if (get == initial_get) {
+ /* nothing's changed */
+ break;
+ }
+
+ if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
+ initial_get) {
+ /* someone else beat us to it */
+ DBUG_ON(ch_sn2->local_GP->get <= initial_get);
+ break;
+ }
+
+ /* we just set the new value of local_GP->get */
+
+ dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
+ "channel=%d\n", get, ch->partid, ch->number);
+
+ send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
+
+ /*
+ * We need to ensure that the message referenced by
+ * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
+ * equals w_local_GP.get, so we'll go have a look.
+ */
+ initial_get = get;
+ }
+
+ if (send_msgrequest)
+ xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+static void
+xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
+{
+ struct xpc_msg_sn2 *msg;
+ s64 msg_number;
+ s64 get;
+
+ msg = container_of(payload, struct xpc_msg_sn2, payload);
+ msg_number = msg->number;
+
+ dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
+ (void *)msg, msg_number, ch->partid, ch->number);
+
+ DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->entry_size) !=
+ msg_number % ch->remote_nentries);
+ DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+
+ msg->flags |= XPC_M_SN2_DONE;
+
+ /*
+ * The preceding store of msg->flags must occur before the following
+ * load of local_GP->get.
+ */
+ mb();
+
+ /*
+ * See if this message is next in line to be acknowledged as having
+ * been delivered.
+ */
+ get = ch->sn.sn2.local_GP->get;
+ if (get == msg_number)
+ xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
+}
+
+int
+xpc_init_sn2(void)
+{
+ int ret;
+ size_t buf_size;
+
+ xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
+ xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
+ xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
+ xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
+ xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
+ xpc_online_heartbeat = xpc_online_heartbeat_sn2;
+ xpc_heartbeat_init = xpc_heartbeat_init_sn2;
+ xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
+ xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
+
+ xpc_request_partition_activation = xpc_request_partition_activation_sn2;
+ xpc_request_partition_reactivation =
+ xpc_request_partition_reactivation_sn2;
+ xpc_request_partition_deactivation =
+ xpc_request_partition_deactivation_sn2;
+ xpc_cancel_partition_deactivation_request =
+ xpc_cancel_partition_deactivation_request_sn2;
+
+ xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
+ xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
+ xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
+ xpc_make_first_contact = xpc_make_first_contact_sn2;
+
+ xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
+ xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
+ xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
+ xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
+ xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
+
+ xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
+
+ xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
+ xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
+
+ xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
+ xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
+ xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
+ xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
+
+ xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
+ xpc_indicate_partition_disengaged =
+ xpc_indicate_partition_disengaged_sn2;
+ xpc_partition_engaged = xpc_partition_engaged_sn2;
+ xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
+ xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
+
+ xpc_send_payload = xpc_send_payload_sn2;
+ xpc_received_payload = xpc_received_payload_sn2;
+
+ if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
+ dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
+ "larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
+ return -E2BIG;
+ }
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
+ xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL,
+ &xpc_remote_copy_buffer_base_sn2);
+ if (xpc_remote_copy_buffer_sn2 == NULL) {
+ dev_err(xpc_part, "can't get memory for remote copy buffer\n");
+ return -ENOMEM;
+ }
+
+ /* open up protections for IPI and [potentially] amo operations */
+ xpc_allow_IPI_ops_sn2();
+ xpc_allow_amo_ops_shub_wars_1_1_sn2();
+
+ /*
+ * This is safe to do before the xpc_hb_checker thread has started
+ * because the handler releases a wait queue. If an interrupt is
+ * received before the thread is waiting, it will not go to sleep,
+ * but rather immediately process the interrupt.
+ */
+ ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
+ "xpc hb", NULL);
+ if (ret != 0) {
+ dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+ "errno=%d\n", -ret);
+ xpc_disallow_IPI_ops_sn2();
+ kfree(xpc_remote_copy_buffer_base_sn2);
+ }
+ return ret;
+}
+
+void
+xpc_exit_sn2(void)
+{
+ free_irq(SGI_XPC_ACTIVATE, NULL);
+ xpc_disallow_IPI_ops_sn2();
+ kfree(xpc_remote_copy_buffer_base_sn2);
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 00000000000..1ac694c0162
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1443 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ * Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#include "../sgi-gru/gru.h"
+#include "../sgi-gru/grukservices.h"
+#include "xpc.h"
+
+static atomic64_t xpc_heartbeat_uv;
+static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+
+#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES)
+
+#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
+ XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
+ XPC_NOTIFY_MSG_SIZE_UV)
+
+static void *xpc_activate_mq_uv;
+static void *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_sn_uv(void)
+{
+ short partid;
+ struct xpc_partition_uv *part_uv;
+
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part_uv = &xpc_partitions[partid].sn.uv;
+
+ spin_lock_init(&part_uv->flags_lock);
+ part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+ }
+ return 0;
+}
+
+static void *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq,
+ irq_handler_t irq_handler)
+{
+ int ret;
+ int nid;
+ int mq_order;
+ struct page *page;
+ void *mq;
+
+ nid = cpu_to_node(cpuid);
+ mq_order = get_order(mq_size);
+ page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ mq_order);
+ if (page == NULL) {
+ dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+ "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+ return NULL;
+ }
+
+ mq = page_address(page);
+ ret = gru_create_message_queue(mq, mq_size);
+ if (ret != 0) {
+ dev_err(xpc_part, "gru_create_message_queue() returned "
+ "error=%d\n", ret);
+ free_pages((unsigned long)mq, mq_order);
+ return NULL;
+ }
+
+ /* !!! Need to do some other things to set up IRQ */
+
+ ret = request_irq(irq, irq_handler, 0, "xpc", NULL);
+ if (ret != 0) {
+ dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+ irq, ret);
+ free_pages((unsigned long)mq, mq_order);
+ return NULL;
+ }
+
+ /* !!! enable generation of irq when GRU mq op occurs to this mq */
+
+ /* ??? allow other partitions to access GRU mq? */
+
+ return mq;
+}
+
+static void
+xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq)
+{
+ /* ??? disallow other partitions to access GRU mq? */
+
+ /* !!! disable generation of irq when GRU mq op occurs to this mq */
+
+ free_irq(irq, NULL);
+
+ free_pages((unsigned long)mq, get_order(mq_size));
+}
+
+static enum xp_retval
+xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size)
+{
+ enum xp_retval xp_ret;
+ int ret;
+
+ while (1) {
+ ret = gru_send_message_gpa(mq_gpa, msg, msg_size);
+ if (ret == MQE_OK) {
+ xp_ret = xpSuccess;
+ break;
+ }
+
+ if (ret == MQE_QUEUE_FULL) {
+ dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+ "error=MQE_QUEUE_FULL\n");
+ /* !!! handle QLimit reached; delay & try again */
+ /* ??? Do we add a limit to the number of retries? */
+ (void)msleep_interruptible(10);
+ } else if (ret == MQE_CONGESTION) {
+ dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+ "error=MQE_CONGESTION\n");
+ /* !!! handle LB Overflow; simply try again */
+ /* ??? Do we add a limit to the number of retries? */
+ } else {
+ /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+ dev_err(xpc_chan, "gru_send_message_gpa() returned "
+ "error=%d\n", ret);
+ xp_ret = xpGruSendMqError;
+ break;
+ }
+ }
+ return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+ unsigned long irq_flags;
+ short partid;
+ struct xpc_partition *part;
+ u8 act_state_req;
+
+ DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part = &xpc_partitions[partid];
+
+ if (part->sn.uv.act_state_req == 0)
+ continue;
+
+ xpc_activate_IRQ_rcvd--;
+ BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+ act_state_req = part->sn.uv.act_state_req;
+ part->sn.uv.act_state_req = 0;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_activate_partition(part);
+ else if (part->act_state == XPC_P_AS_DEACTIVATING)
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+ } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_activate_partition(part);
+ else
+ XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+ } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+ XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+ } else {
+ BUG();
+ }
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (xpc_activate_IRQ_rcvd == 0)
+ break;
+ }
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+ struct xpc_activate_mq_msghdr_uv *msg_hdr,
+ int *wakeup_hb_checker)
+{
+ unsigned long irq_flags;
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ struct xpc_openclose_args *args;
+
+ part_uv->remote_act_state = msg_hdr->act_state;
+
+ switch (msg_hdr->type) {
+ case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+ /* syncing of remote_act_state was just done above */
+ break;
+
+ case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
+ struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+ msg = container_of(msg_hdr,
+ struct xpc_activate_mq_msg_heartbeat_req_uv,
+ hdr);
+ part_uv->heartbeat = msg->heartbeat;
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
+ struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+ msg = container_of(msg_hdr,
+ struct xpc_activate_mq_msg_heartbeat_req_uv,
+ hdr);
+ part_uv->heartbeat = msg->heartbeat;
+
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
+ struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+ msg = container_of(msg_hdr,
+ struct xpc_activate_mq_msg_heartbeat_req_uv,
+ hdr);
+ part_uv->heartbeat = msg->heartbeat;
+
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+ struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+ /*
+ * ??? Do we deal here with ts_jiffies being different
+ * ??? if act_state != XPC_P_AS_INACTIVE instead of
+ * ??? below?
+ */
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_activate_req_uv, hdr);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+ part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+ part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+ part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+ struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_deactivate_req_uv, hdr);
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = msg->reason;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ return;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+ struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_closerequest_uv,
+ hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->reason = msg->reason;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+ struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_closereply_uv,
+ hdr);
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+ struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_openrequest_uv,
+ hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->entry_size = msg->entry_size;
+ args->local_nentries = msg->local_nentries;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+ struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+ msg = container_of(msg_hdr, struct
+ xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+ args = &part->remote_openclose_args[msg->ch_number];
+ args->remote_nentries = msg->remote_nentries;
+ args->local_nentries = msg->local_nentries;
+ args->local_msgqueue_pa = msg->local_notify_mq_gpa;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+ break;
+ }
+ case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags |= XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+
+ case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+ break;
+
+ default:
+ dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+ "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+ /* get hb checker to deactivate from the remote partition */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = xpBadMsgType;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ return;
+ }
+
+ if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+ part->remote_rp_ts_jiffies != 0) {
+ /*
+ * ??? Does what we do here need to be sensitive to
+ * ??? act_state or remote_act_state?
+ */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ (*wakeup_hb_checker)++;
+ }
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+ struct xpc_activate_mq_msghdr_uv *msg_hdr;
+ short partid;
+ struct xpc_partition *part;
+ int wakeup_hb_checker = 0;
+
+ while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) {
+
+ partid = msg_hdr->partid;
+ if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+ dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+ "received invalid partid=0x%x in message\n",
+ partid);
+ } else {
+ part = &xpc_partitions[partid];
+ if (xpc_part_ref(part)) {
+ xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+ &wakeup_hb_checker);
+ xpc_part_deref(part);
+ }
+ }
+
+ gru_free_message(xpc_activate_mq_uv, msg_hdr);
+ }
+
+ if (wakeup_hb_checker)
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+ return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+ int msg_type)
+{
+ struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+
+ DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+ msg_hdr->type = msg_type;
+ msg_hdr->partid = XPC_PARTID(part);
+ msg_hdr->act_state = part->act_state;
+ msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+ /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+ return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg,
+ msg_size);
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+ size_t msg_size, int msg_type)
+{
+ enum xp_retval ret;
+
+ ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+ if (unlikely(ret != xpSuccess))
+ XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+ void *msg, size_t msg_size, int msg_type)
+{
+ struct xpc_partition *part = &xpc_partitions[ch->number];
+ enum xp_retval ret;
+
+ ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+ if (unlikely(ret != xpSuccess)) {
+ if (irq_flags != NULL)
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+ XPC_DEACTIVATE_PARTITION(part, ret);
+
+ if (irq_flags != NULL)
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+ unsigned long irq_flags;
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+
+ /*
+ * !!! Make our side think that the remote parition sent an activate
+ * !!! message our way by doing what the activate IRQ handler would
+ * !!! do had one really been sent.
+ */
+
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = act_state_req;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+ size_t *len)
+{
+ /* !!! call the UV version of sn_partition_reserved_page_pa() */
+ return xpUnsupported;
+}
+
+static int
+xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
+{
+ rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv);
+ return 0;
+}
+
+static void
+xpc_send_heartbeat_uv(int msg_type)
+{
+ short partid;
+ struct xpc_partition *part;
+ struct xpc_activate_mq_msg_heartbeat_req_uv msg;
+
+ /*
+ * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
+ * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
+ * !!! seconds. This is an increase in numalink traffic.
+ * ??? Is this good?
+ */
+
+ msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
+
+ partid = find_first_bit(xpc_heartbeating_to_mask_uv,
+ XP_MAX_NPARTITIONS_UV);
+
+ while (partid < XP_MAX_NPARTITIONS_UV) {
+ part = &xpc_partitions[partid];
+
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ msg_type);
+
+ partid = find_next_bit(xpc_heartbeating_to_mask_uv,
+ XP_MAX_NPARTITIONS_UV, partid + 1);
+ }
+}
+
+static void
+xpc_increment_heartbeat_uv(void)
+{
+ xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+ xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+ xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+ atomic64_set(&xpc_heartbeat_uv, 0);
+ bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+ xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+ xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ enum xp_retval ret = xpNoHeartbeat;
+
+ if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
+ part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
+
+ if (part_uv->heartbeat != part->last_heartbeat ||
+ (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
+
+ part->last_heartbeat = part_uv->heartbeat;
+ ret = xpSuccess;
+ }
+ }
+ return ret;
+}
+
+static void
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+ unsigned long remote_rp_gpa, int nasid)
+{
+ short partid = remote_rp->SAL_partid;
+ struct xpc_partition *part = &xpc_partitions[partid];
+ struct xpc_activate_mq_msg_activate_req_uv msg;
+
+ part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+ part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+ part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
+
+ /*
+ * ??? Is it a good idea to make this conditional on what is
+ * ??? potentially stale state information?
+ */
+ if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+ msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+ msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa;
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+ }
+
+ if (part->act_state == XPC_P_AS_INACTIVE)
+ xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+ xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+ /*
+ * ??? Is it a good idea to make this conditional on what is
+ * ??? potentially stale state information?
+ */
+ if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+ part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+ msg.reason = part->reason;
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+ }
+}
+
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+ /* nothing needs to be done */
+ return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+ head->first = NULL;
+ head->last = NULL;
+ spin_lock_init(&head->lock);
+ head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+ unsigned long irq_flags;
+ struct xpc_fifo_entry_uv *first;
+
+ spin_lock_irqsave(&head->lock, irq_flags);
+ first = head->first;
+ if (head->first != NULL) {
+ head->first = first->next;
+ if (head->first == NULL)
+ head->last = NULL;
+ }
+ head->n_entries++;
+ spin_unlock_irqrestore(&head->lock, irq_flags);
+ first->next = NULL;
+ return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+ struct xpc_fifo_entry_uv *last)
+{
+ unsigned long irq_flags;
+
+ last->next = NULL;
+ spin_lock_irqsave(&head->lock, irq_flags);
+ if (head->last != NULL)
+ head->last->next = last;
+ else
+ head->first = last;
+ head->last = last;
+ head->n_entries--;
+ BUG_ON(head->n_entries < 0);
+ spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+ return head->n_entries;
+}
+
+/*
+ * Setup the channel structures that are uv specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
+{
+ struct xpc_channel_uv *ch_uv;
+ int ch_number;
+
+ for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+ ch_uv = &part->channels[ch_number].sn.uv;
+
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+ }
+
+ return xpSuccess;
+}
+
+/*
+ * Teardown the channel structures that are uv specific.
+ */
+static void
+xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
+{
+ /* nothing needs to be done */
+ return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ /*
+ * We send a sync msg to get the remote partition's remote_act_state
+ * updated to our current act_state which at this point should
+ * be XPC_P_AS_ACTIVATING.
+ */
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+ while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) {
+
+ dev_dbg(xpc_part, "waiting to make first contact with "
+ "partition %d\n", XPC_PARTID(part));
+
+ /* wait a 1/4 of a second or so */
+ (void)msleep_interruptible(250);
+
+ if (part->act_state == XPC_P_AS_DEACTIVATING)
+ return part->reason;
+ }
+
+ return xpSuccess;
+}
+
+static u64
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ union xpc_channel_ctl_flags chctl;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ chctl = part->chctl;
+ if (chctl.all_flags != 0)
+ part->chctl.all_flags = 0;
+
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+ return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+ struct xpc_send_msg_slot_uv *msg_slot;
+ unsigned long irq_flags;
+ int nentries;
+ int entry;
+ size_t nbytes;
+
+ for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+ nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+ ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_uv->send_msg_slots == NULL)
+ continue;
+
+ for (entry = 0; entry < nentries; entry++) {
+ msg_slot = &ch_uv->send_msg_slots[entry];
+
+ msg_slot->msg_slot_number = entry;
+ xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+ &msg_slot->next);
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->local_nentries)
+ ch->local_nentries = nentries;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+ struct xpc_notify_mq_msg_uv *msg_slot;
+ unsigned long irq_flags;
+ int nentries;
+ int entry;
+ size_t nbytes;
+
+ for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+ nbytes = nentries * ch->entry_size;
+ ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+ if (ch_uv->recv_msg_slots == NULL)
+ continue;
+
+ for (entry = 0; entry < nentries; entry++) {
+ msg_slot = ch_uv->recv_msg_slots + entry *
+ ch->entry_size;
+
+ msg_slot->hdr.msg_slot_number = entry;
+ }
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ if (nentries < ch->remote_nentries)
+ ch->remote_nentries = nentries;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return xpSuccess;
+ }
+
+ return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+ static enum xp_retval ret;
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+ DBUG_ON(ch->flags & XPC_C_SETUP);
+
+ ret = xpc_allocate_send_msg_slot_uv(ch);
+ if (ret == xpSuccess) {
+
+ ret = xpc_allocate_recv_msg_slot_uv(ch);
+ if (ret != xpSuccess) {
+ kfree(ch_uv->send_msg_slots);
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ }
+ }
+ return ret;
+}
+
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+ struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+ DBUG_ON(!spin_is_locked(&ch->lock));
+
+ ch_uv->remote_notify_mq_gpa = 0;
+
+ if (ch->flags & XPC_C_SETUP) {
+ xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+ kfree(ch_uv->send_msg_slots);
+ xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+ kfree(ch_uv->recv_msg_slots);
+ }
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.reason = ch->reason;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+ msg.ch_number = ch->number;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.entry_size = ch->entry_size;
+ msg.local_nentries = ch->local_nentries;
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+ struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+ msg.ch_number = ch->number;
+ msg.local_nentries = ch->local_nentries;
+ msg.remote_nentries = ch->remote_nentries;
+ msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv);
+ xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part->chctl_lock, irq_flags);
+ part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+ spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+ xpc_wakeup_channel_mgr(part);
+}
+
+static void
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+ unsigned long msgqueue_pa)
+{
+ ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa;
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+ struct xpc_activate_mq_msg_uv msg;
+
+ xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+ XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+ struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+ part_uv->flags &= ~XPC_P_ENGAGED_UV;
+ spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+ return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+ struct xpc_partition_uv *part_uv;
+ short partid;
+
+ for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+ part_uv = &xpc_partitions[partid].sn.uv;
+ if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+ return 1;
+ }
+ return 0;
+}
+
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+ struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+ enum xp_retval ret;
+ struct xpc_send_msg_slot_uv *msg_slot;
+ struct xpc_fifo_entry_uv *entry;
+
+ while (1) {
+ entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+ if (entry != NULL)
+ break;
+
+ if (flags & XPC_NOWAIT)
+ return xpNoWait;
+
+ ret = xpc_allocate_msg_wait(ch);
+ if (ret != xpInterrupted && ret != xpTimeout)
+ return ret;
+ }
+
+ msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+ *address_of_msg_slot = msg_slot;
+ return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+ struct xpc_send_msg_slot_uv *msg_slot)
+{
+ xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+ /* wakeup anyone waiting for a free msg slot */
+ if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+ wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+ struct xpc_send_msg_slot_uv *msg_slot,
+ enum xp_retval reason)
+{
+ xpc_notify_func func = msg_slot->func;
+
+ if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+ atomic_dec(&ch->n_to_notify);
+
+ dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+ "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+ msg_slot->msg_slot_number, ch->partid, ch->number);
+
+ func(reason, ch->partid, ch->number, msg_slot->key);
+
+ dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+ "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+ msg_slot->msg_slot_number, ch->partid, ch->number);
+ }
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+ struct xpc_notify_mq_msg_uv *msg)
+{
+ struct xpc_send_msg_slot_uv *msg_slot;
+ int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+ msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+ BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+ msg_slot->msg_slot_number += ch->local_nentries;
+
+ if (msg_slot->func != NULL)
+ xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+ xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+ struct xpc_notify_mq_msg_uv *msg)
+{
+ struct xpc_partition_uv *part_uv = &part->sn.uv;
+ struct xpc_channel *ch;
+ struct xpc_channel_uv *ch_uv;
+ struct xpc_notify_mq_msg_uv *msg_slot;
+ unsigned long irq_flags;
+ int ch_number = msg->hdr.ch_number;
+
+ if (unlikely(ch_number >= part->nchannels)) {
+ dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+ "channel number=0x%x in message from partid=%d\n",
+ ch_number, XPC_PARTID(part));
+
+ /* get hb checker to deactivate from the remote partition */
+ spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+ if (part_uv->act_state_req == 0)
+ xpc_activate_IRQ_rcvd++;
+ part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+ part_uv->reason = xpBadChannelNumber;
+ spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+ wake_up_interruptible(&xpc_activate_IRQ_wq);
+ return;
+ }
+
+ ch = &part->channels[ch_number];
+ xpc_msgqueue_ref(ch);
+
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /* see if we're really dealing with an ACK for a previously sent msg */
+ if (msg->hdr.size == 0) {
+ xpc_handle_notify_mq_ack_uv(ch, msg);
+ xpc_msgqueue_deref(ch);
+ return;
+ }
+
+ /* we're dealing with a normal message sent via the notify_mq */
+ ch_uv = &ch->sn.uv;
+
+ msg_slot = (struct xpc_notify_mq_msg_uv *)((u64)ch_uv->recv_msg_slots +
+ (msg->hdr.msg_slot_number % ch->remote_nentries) *
+ ch->entry_size);
+
+ BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number);
+ BUG_ON(msg_slot->hdr.size != 0);
+
+ memcpy(msg_slot, msg, msg->hdr.size);
+
+ xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+ if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+ /*
+ * If there is an existing idle kthread get it to deliver
+ * the payload, otherwise we'll have to get the channel mgr
+ * for this partition to create a kthread to do the delivery.
+ */
+ if (atomic_read(&ch->kthreads_idle) > 0)
+ wake_up_nr(&ch->idle_wq, 1);
+ else
+ xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+ }
+ xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+ struct xpc_notify_mq_msg_uv *msg;
+ short partid;
+ struct xpc_partition *part;
+
+ while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) {
+
+ partid = msg->hdr.partid;
+ if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+ dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+ "invalid partid=0x%x in message\n", partid);
+ } else {
+ part = &xpc_partitions[partid];
+
+ if (xpc_part_ref(part)) {
+ xpc_handle_notify_mq_msg_uv(part, msg);
+ xpc_part_deref(part);
+ }
+ }
+
+ gru_free_message(xpc_notify_mq_uv, msg);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+ return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+ struct xpc_channel *ch = &part->channels[ch_number];
+ int ndeliverable_payloads;
+
+ xpc_msgqueue_ref(ch);
+
+ ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+ if (ndeliverable_payloads > 0 &&
+ (ch->flags & XPC_C_CONNECTED) &&
+ (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+ xpc_activate_kthreads(ch, ndeliverable_payloads);
+ }
+
+ xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+ u16 payload_size, u8 notify_type, xpc_notify_func func,
+ void *key)
+{
+ enum xp_retval ret = xpSuccess;
+ struct xpc_send_msg_slot_uv *msg_slot = NULL;
+ struct xpc_notify_mq_msg_uv *msg;
+ u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+ size_t msg_size;
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+ if (msg_size > ch->entry_size)
+ return xpPayloadTooBig;
+
+ xpc_msgqueue_ref(ch);
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_1;
+ }
+ if (!(ch->flags & XPC_C_CONNECTED)) {
+ ret = xpNotConnected;
+ goto out_1;
+ }
+
+ ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+ if (ret != xpSuccess)
+ goto out_1;
+
+ if (func != NULL) {
+ atomic_inc(&ch->n_to_notify);
+
+ msg_slot->key = key;
+ wmb(); /* a non-NULL func must hit memory after the key */
+ msg_slot->func = func;
+
+ if (ch->flags & XPC_C_DISCONNECTING) {
+ ret = ch->reason;
+ goto out_2;
+ }
+ }
+
+ msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+ msg->hdr.partid = xp_partition_id;
+ msg->hdr.ch_number = ch->number;
+ msg->hdr.size = msg_size;
+ msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+ memcpy(&msg->payload, payload, payload_size);
+
+ ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size);
+ if (ret == xpSuccess)
+ goto out_1;
+
+ XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+ if (func != NULL) {
+ /*
+ * Try to NULL the msg_slot's func field. If we fail, then
+ * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+ * case we need to pretend we succeeded to send the message
+ * since the user will get a callout for the disconnect error
+ * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+ * error returned here will confuse them. Additionally, since
+ * in this case the channel is being disconnected we don't need
+ * to put the the msg_slot back on the free list.
+ */
+ if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+ ret = xpSuccess;
+ goto out_1;
+ }
+
+ msg_slot->key = NULL;
+ atomic_dec(&ch->n_to_notify);
+ }
+ xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+ xpc_msgqueue_deref(ch);
+ return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+ struct xpc_send_msg_slot_uv *msg_slot;
+ int entry;
+
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+ for (entry = 0; entry < ch->local_nentries; entry++) {
+
+ if (atomic_read(&ch->n_to_notify) == 0)
+ break;
+
+ msg_slot = &ch->sn.uv.send_msg_slots[entry];
+ if (msg_slot->func != NULL)
+ xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+ }
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+ struct xpc_fifo_entry_uv *entry;
+ struct xpc_notify_mq_msg_uv *msg;
+ void *payload = NULL;
+
+ if (!(ch->flags & XPC_C_DISCONNECTING)) {
+ entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+ if (entry != NULL) {
+ msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+ hdr.u.next);
+ payload = &msg->payload;
+ }
+ }
+ return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
+{
+ struct xpc_notify_mq_msg_uv *msg;
+ enum xp_retval ret;
+
+ msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+ /* return an ACK to the sender of this message */
+
+ msg->hdr.partid = xp_partition_id;
+ msg->hdr.size = 0; /* size of zero indicates this is an ACK */
+
+ ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg,
+ sizeof(struct xpc_notify_mq_msghdr_uv));
+ if (ret != xpSuccess)
+ XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+
+ msg->hdr.msg_slot_number += ch->remote_nentries;
+}
+
+int
+xpc_init_uv(void)
+{
+ xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
+ xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
+ xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
+ xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
+ xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
+ xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
+ xpc_online_heartbeat = xpc_online_heartbeat_uv;
+ xpc_heartbeat_init = xpc_heartbeat_init_uv;
+ xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
+ xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
+
+ xpc_request_partition_activation = xpc_request_partition_activation_uv;
+ xpc_request_partition_reactivation =
+ xpc_request_partition_reactivation_uv;
+ xpc_request_partition_deactivation =
+ xpc_request_partition_deactivation_uv;
+ xpc_cancel_partition_deactivation_request =
+ xpc_cancel_partition_deactivation_request_uv;
+
+ xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
+ xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
+
+ xpc_make_first_contact = xpc_make_first_contact_uv;
+
+ xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
+ xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
+ xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
+ xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
+ xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
+
+ xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
+
+ xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
+ xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
+
+ xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
+ xpc_indicate_partition_disengaged =
+ xpc_indicate_partition_disengaged_uv;
+ xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
+ xpc_partition_engaged = xpc_partition_engaged_uv;
+ xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
+
+ xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
+ xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
+ xpc_send_payload = xpc_send_payload_uv;
+ xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
+ xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
+ xpc_received_payload = xpc_received_payload_uv;
+
+ if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+ dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+ XPC_MSG_HDR_MAX_SIZE);
+ return -E2BIG;
+ }
+
+ /* ??? The cpuid argument's value is 0, is that what we want? */
+ /* !!! The irq argument's value isn't correct. */
+ xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0,
+ xpc_handle_activate_IRQ_uv);
+ if (xpc_activate_mq_uv == NULL)
+ return -ENOMEM;
+
+ /* ??? The cpuid argument's value is 0, is that what we want? */
+ /* !!! The irq argument's value isn't correct. */
+ xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0,
+ xpc_handle_notify_IRQ_uv);
+ if (xpc_notify_mq_uv == NULL) {
+ /* !!! The irq argument's value isn't correct. */
+ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv,
+ XPC_ACTIVATE_MQ_SIZE_UV, 0);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void
+xpc_exit_uv(void)
+{
+ /* !!! The irq argument's value isn't correct. */
+ xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0);
+
+ /* !!! The irq argument's value isn't correct. */
+ xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0);
+}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 822dc8e8d7f..71513b3af70 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -21,21 +21,8 @@
*/
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/io.h>
-#include <asm/sn/sn_sal.h>
-#include <asm/atomic.h>
#include "xp.h"
/*
@@ -57,7 +44,7 @@ struct xpnet_message {
u16 version; /* Version for this message */
u16 embedded_bytes; /* #of bytes embedded in XPC message */
u32 magic; /* Special number indicating this is xpnet */
- u64 buf_pa; /* phys address of buffer to retrieve */
+ unsigned long buf_pa; /* phys address of buffer to retrieve */
u32 size; /* #of bytes in buffer */
u8 leadin_ignore; /* #of bytes to ignore at the beginning */
u8 tailout_ignore; /* #of bytes to ignore at the end */
@@ -70,11 +57,10 @@ struct xpnet_message {
*
* XPC expects each message to exist in an individual cacheline.
*/
-#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
+#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE
#define XPNET_MSG_DATA_MAX \
- (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
-#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE))
-#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
+ (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE)
#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
@@ -105,7 +91,6 @@ struct xpnet_message {
* then be released.
*/
struct xpnet_pending_msg {
- struct list_head free_list;
struct sk_buff *skb;
atomic_t use_count;
};
@@ -121,7 +106,7 @@ struct net_device *xpnet_device;
* When we are notified of other partitions activating, we add them to
* our bitmask of partitions to which we broadcast.
*/
-static u64 xpnet_broadcast_partitions;
+static unsigned long *xpnet_broadcast_partitions;
/* protect above */
static DEFINE_SPINLOCK(xpnet_broadcast_lock);
@@ -141,16 +126,13 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock);
#define XPNET_DEF_MTU (0x8000UL)
/*
- * The partition id is encapsulated in the MAC address. The following
- * define locates the octet the partid is in.
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
*/
-#define XPNET_PARTID_OCTET 1
-#define XPNET_LICENSE_OCTET 2
+#define XPNET_PARTID_OCTET 2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
-/*
- * Define the XPNET debug device structure that is to be used with dev_dbg(),
- * dev_err(), dev_warn(), and dev_info().
- */
struct device_driver xpnet_dbg_name = {
.name = "xpnet"
};
@@ -169,7 +151,8 @@ static void
xpnet_receive(short partid, int channel, struct xpnet_message *msg)
{
struct sk_buff *skb;
- bte_result_t bret;
+ void *dst;
+ enum xp_retval ret;
struct xpnet_dev_private *priv =
(struct xpnet_dev_private *)xpnet_device->priv;
@@ -201,7 +184,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
/*
* The allocated skb has some reserved space.
- * In order to use bte_copy, we need to get the
+ * In order to use xp_remote_memcpy(), we need to get the
* skb->data pointer moved forward.
*/
skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
@@ -226,26 +209,21 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
skb_copy_to_linear_data(skb, &msg->data,
(size_t)msg->embedded_bytes);
} else {
+ dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
- "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
- (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
- msg->size);
-
- bret = bte_copy(msg->buf_pa,
- __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
- msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+ "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+ (void *)msg->buf_pa, msg->size);
- if (bret != BTE_SUCCESS) {
+ ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
+ if (ret != xpSuccess) {
/*
- * >>> Need better way of cleaning skb. Currently skb
- * >>> appears in_use and we can't just call
- * >>> dev_kfree_skb.
+ * !!! Need better way of cleaning skb. Currently skb
+ * !!! appears in_use and we can't just call
+ * !!! dev_kfree_skb.
*/
- dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
- "error=0x%x\n", (void *)msg->buf_pa,
- (void *)__pa((u64)skb->data &
- ~(L1_CACHE_BYTES - 1)),
- msg->size, bret);
+ dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+ "returned error=0x%x\n", dst,
+ (void *)msg->buf_pa, msg->size, ret);
xpc_received(partid, channel, (void *)msg);
@@ -285,9 +263,7 @@ static void
xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
void *data, void *key)
{
- long bp;
-
- DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+ DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
DBUG_ON(channel != XPC_NET_CHANNEL);
switch (reason) {
@@ -299,31 +275,28 @@ xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
case xpConnected: /* connection completed to a partition */
spin_lock_bh(&xpnet_broadcast_lock);
- xpnet_broadcast_partitions |= 1UL << (partid - 1);
- bp = xpnet_broadcast_partitions;
+ __set_bit(partid, xpnet_broadcast_partitions);
spin_unlock_bh(&xpnet_broadcast_lock);
netif_carrier_on(xpnet_device);
- dev_dbg(xpnet, "%s connection created to partition %d; "
- "xpnet_broadcast_partitions=0x%lx\n",
- xpnet_device->name, partid, bp);
+ dev_dbg(xpnet, "%s connected to partition %d\n",
+ xpnet_device->name, partid);
break;
default:
spin_lock_bh(&xpnet_broadcast_lock);
- xpnet_broadcast_partitions &= ~(1UL << (partid - 1));
- bp = xpnet_broadcast_partitions;
+ __clear_bit(partid, xpnet_broadcast_partitions);
spin_unlock_bh(&xpnet_broadcast_lock);
- if (bp == 0)
+ if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
+ xp_max_npartitions)) {
netif_carrier_off(xpnet_device);
+ }
- dev_dbg(xpnet, "%s disconnected from partition %d; "
- "xpnet_broadcast_partitions=0x%lx\n",
- xpnet_device->name, partid, bp);
+ dev_dbg(xpnet, "%s disconnected from partition %d\n",
+ xpnet_device->name, partid);
break;
-
}
}
@@ -334,8 +307,10 @@ xpnet_dev_open(struct net_device *dev)
dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
- XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
- XPNET_MAX_IDLE_KTHREADS);
+ (unsigned long)XPNET_MSG_SIZE,
+ (unsigned long)XPNET_MSG_NENTRIES,
+ (unsigned long)XPNET_MAX_KTHREADS,
+ (unsigned long)XPNET_MAX_IDLE_KTHREADS);
ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
@@ -426,35 +401,74 @@ xpnet_send_completed(enum xp_retval reason, short partid, int channel,
}
}
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+ u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+ u8 msg_buffer[XPNET_MSG_SIZE];
+ struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+ u16 msg_size = sizeof(struct xpnet_message);
+ enum xp_retval ret;
+
+ msg->embedded_bytes = embedded_bytes;
+ if (unlikely(embedded_bytes != 0)) {
+ msg->version = XPNET_VERSION_EMBED;
+ dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+ &msg->data, skb->data, (size_t)embedded_bytes);
+ skb_copy_from_linear_data(skb, &msg->data,
+ (size_t)embedded_bytes);
+ msg_size += embedded_bytes - 1;
+ } else {
+ msg->version = XPNET_VERSION;
+ }
+ msg->magic = XPNET_MAGIC;
+ msg->size = end_addr - start_addr;
+ msg->leadin_ignore = (u64)skb->data - start_addr;
+ msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+ msg->buf_pa = xp_pa((void *)start_addr);
+
+ dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+ KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
+ "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+ dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+ atomic_inc(&queued_msg->use_count);
+
+ ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+ msg_size, xpnet_send_completed, queued_msg);
+ if (unlikely(ret != xpSuccess))
+ atomic_dec(&queued_msg->use_count);
+}
+
/*
* Network layer has formatted a packet (skb) and is ready to place it
* "on the wire". Prepare and send an xpnet_message to all partitions
* which have connected with us and are targets of this packet.
*
* MAC-NOTE: For the XPNET driver, the MAC address contains the
- * destination partition_id. If the destination partition id word
- * is 0xff, this packet is to broadcast to all partitions.
+ * destination partid. If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
*/
static int
xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xpnet_pending_msg *queued_msg;
- enum xp_retval ret;
- struct xpnet_message *msg;
u64 start_addr, end_addr;
- long dp;
- u8 second_mac_octet;
short dest_partid;
- struct xpnet_dev_private *priv;
- u16 embedded_bytes;
-
- priv = (struct xpnet_dev_private *)dev->priv;
+ struct xpnet_dev_private *priv = (struct xpnet_dev_private *)dev->priv;
+ u16 embedded_bytes = 0;
dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
skb->len);
+ if (skb->data[0] == 0x33) {
+ dev_kfree_skb(skb);
+ return 0; /* nothing needed to be done */
+ }
+
/*
* The xpnet_pending_msg tracks how many outstanding
* xpc_send_notifies are relying on this skb. When none
@@ -466,7 +480,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
"packet\n", sizeof(struct xpnet_pending_msg));
priv->stats.tx_errors++;
-
return -ENOMEM;
}
@@ -475,7 +488,6 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
/* calculate how many bytes to embed in the XPC message */
- embedded_bytes = 0;
if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
/* skb->data does fit so embed */
embedded_bytes = skb->len;
@@ -491,82 +503,28 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
atomic_set(&queued_msg->use_count, 1);
queued_msg->skb = skb;
- second_mac_octet = skb->data[XPNET_PARTID_OCTET];
- if (second_mac_octet == 0xff) {
+ if (skb->data[0] == 0xff) {
/* we are being asked to broadcast to all partitions */
- dp = xpnet_broadcast_partitions;
- } else if (second_mac_octet != 0) {
- dp = xpnet_broadcast_partitions &
- (1UL << (second_mac_octet - 1));
- } else {
- /* 0 is an invalid partid. Ignore */
- dp = 0;
- }
- dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
-
- /*
- * If we wanted to allow promiscuous mode to work like an
- * unswitched network, this would be a good point to OR in a
- * mask of partitions which should be receiving all packets.
- */
-
- /*
- * Main send loop.
- */
- for (dest_partid = 1; dp && dest_partid < XP_MAX_PARTITIONS;
- dest_partid++) {
+ for_each_bit(dest_partid, xpnet_broadcast_partitions,
+ xp_max_npartitions) {
- if (!(dp & (1UL << (dest_partid - 1)))) {
- /* not destined for this partition */
- continue;
+ xpnet_send(skb, queued_msg, start_addr, end_addr,
+ embedded_bytes, dest_partid);
}
+ } else {
+ dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+ dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
- /* remove this partition from the destinations mask */
- dp &= ~(1UL << (dest_partid - 1));
-
- /* found a partition to send to */
-
- ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
- XPC_NOWAIT, (void **)&msg);
- if (unlikely(ret != xpSuccess))
- continue;
-
- msg->embedded_bytes = embedded_bytes;
- if (unlikely(embedded_bytes != 0)) {
- msg->version = XPNET_VERSION_EMBED;
- dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
- &msg->data, skb->data, (size_t)embedded_bytes);
- skb_copy_from_linear_data(skb, &msg->data,
- (size_t)embedded_bytes);
- } else {
- msg->version = XPNET_VERSION;
- }
- msg->magic = XPNET_MAGIC;
- msg->size = end_addr - start_addr;
- msg->leadin_ignore = (u64)skb->data - start_addr;
- msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
- msg->buf_pa = __pa(start_addr);
-
- dev_dbg(xpnet, "sending XPC message to %d:%d\n"
- KERN_DEBUG "msg->buf_pa=0x%lx, msg->size=%u, "
- "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
- dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
- msg->leadin_ignore, msg->tailout_ignore);
-
- atomic_inc(&queued_msg->use_count);
-
- ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
- xpnet_send_completed, queued_msg);
- if (unlikely(ret != xpSuccess)) {
- atomic_dec(&queued_msg->use_count);
- continue;
+ if (dest_partid >= 0 &&
+ dest_partid < xp_max_npartitions &&
+ test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
+
+ xpnet_send(skb, queued_msg, start_addr, end_addr,
+ embedded_bytes, dest_partid);
}
}
if (atomic_dec_return(&queued_msg->use_count) == 0) {
- dev_dbg(xpnet, "no partitions to receive packet destined for "
- "%d\n", dest_partid);
-
dev_kfree_skb(skb);
kfree(queued_msg);
}
@@ -594,23 +552,28 @@ xpnet_dev_tx_timeout(struct net_device *dev)
static int __init
xpnet_init(void)
{
- int i;
- u32 license_num;
- int result = -ENOMEM;
+ int result;
- if (!ia64_platform_is("sn2"))
+ if (!is_shub() && !is_uv())
return -ENODEV;
dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+ xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
+ sizeof(long), GFP_KERNEL);
+ if (xpnet_broadcast_partitions == NULL)
+ return -ENOMEM;
+
/*
* use ether_setup() to init the majority of our device
* structure and then override the necessary pieces.
*/
xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
XPNET_DEVICE_NAME, ether_setup);
- if (xpnet_device == NULL)
+ if (xpnet_device == NULL) {
+ kfree(xpnet_broadcast_partitions);
return -ENOMEM;
+ }
netif_carrier_off(xpnet_device);
@@ -628,14 +591,10 @@ xpnet_init(void)
* MAC addresses. We chose the first octet of the MAC to be unlikely
* to collide with any vendor's officially issued MAC.
*/
- xpnet_device->dev_addr[0] = 0xfe;
- xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_partition_id;
- license_num = sn_partition_serial_number_val();
- for (i = 3; i >= 0; i--) {
- xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
- license_num & 0xff;
- license_num = license_num >> 8;
- }
+ xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */
+
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
/*
* ether_setup() sets this to a multicast device. We are
@@ -651,8 +610,10 @@ xpnet_init(void)
xpnet_device->features = NETIF_F_NO_CSUM;
result = register_netdev(xpnet_device);
- if (result != 0)
+ if (result != 0) {
free_netdev(xpnet_device);
+ kfree(xpnet_broadcast_partitions);
+ }
return result;
}
@@ -666,8 +627,8 @@ xpnet_exit(void)
xpnet_device[0].name);
unregister_netdev(xpnet_device);
-
free_netdev(xpnet_device);
+ kfree(xpnet_broadcast_partitions);
}
module_exit(xpnet_exit);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 66e5a5487c2..86dbb366415 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -213,7 +213,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
struct mmc_blk_data *md = mq->data;
struct mmc_card *card = md->queue.card;
struct mmc_blk_request brq;
- int ret = 1, sg_pos, data_size;
+ int ret = 1, data_size, i;
+ struct scatterlist *sg;
mmc_claim_host(card->host);
@@ -267,18 +268,22 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
mmc_queue_bounce_pre(mq);
+ /*
+ * Adjust the sg list so it is the same size as the
+ * request.
+ */
if (brq.data.blocks !=
(req->nr_sectors >> (md->block_bits - 9))) {
data_size = brq.data.blocks * brq.data.blksz;
- for (sg_pos = 0; sg_pos < brq.data.sg_len; sg_pos++) {
- data_size -= mq->sg[sg_pos].length;
+ for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
+ data_size -= sg->length;
if (data_size <= 0) {
- mq->sg[sg_pos].length += data_size;
- sg_pos++;
+ sg->length += data_size;
+ i++;
break;
}
}
- brq.data.sg_len = sg_pos;
+ brq.data.sg_len = i;
}
mmc_wait_for_req(card->host, &brq.mrq);
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index a067fe43630..f26b01d811a 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -388,16 +388,14 @@ static int mmc_test_transfer(struct mmc_test_card *test,
int ret, i;
unsigned long flags;
- BUG_ON(blocks * blksz > BUFFER_SIZE);
-
if (write) {
for (i = 0;i < blocks * blksz;i++)
test->scratch[i] = i;
} else {
- memset(test->scratch, 0, blocks * blksz);
+ memset(test->scratch, 0, BUFFER_SIZE);
}
local_irq_save(flags);
- sg_copy_from_buffer(sg, sg_len, test->scratch, blocks * blksz);
+ sg_copy_from_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
local_irq_restore(flags);
ret = mmc_test_set_blksize(test, blksz);
@@ -444,7 +442,7 @@ static int mmc_test_transfer(struct mmc_test_card *test,
}
} else {
local_irq_save(flags);
- sg_copy_to_buffer(sg, sg_len, test->scratch, blocks * blksz);
+ sg_copy_to_buffer(sg, sg_len, test->scratch, BUFFER_SIZE);
local_irq_restore(flags);
for (i = 0;i < blocks * blksz;i++) {
if (test->scratch[i] != (u8)i)
@@ -805,69 +803,6 @@ static int mmc_test_multi_xfersize_read(struct mmc_test_card *test)
return 0;
}
-static int mmc_test_bigsg_write(struct mmc_test_card *test)
-{
- int ret;
- unsigned int size;
- struct scatterlist sg;
-
- if (test->card->host->max_blk_count == 1)
- return RESULT_UNSUP_HOST;
-
- size = PAGE_SIZE * 2;
- size = min(size, test->card->host->max_req_size);
- size = min(size, test->card->host->max_seg_size);
- size = min(size, test->card->host->max_blk_count * 512);
-
- memset(test->buffer, 0, BUFFER_SIZE);
-
- if (size < 1024)
- return RESULT_UNSUP_HOST;
-
- sg_init_table(&sg, 1);
- sg_init_one(&sg, test->buffer, BUFFER_SIZE);
-
- ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 1);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int mmc_test_bigsg_read(struct mmc_test_card *test)
-{
- int ret, i;
- unsigned int size;
- struct scatterlist sg;
-
- if (test->card->host->max_blk_count == 1)
- return RESULT_UNSUP_HOST;
-
- size = PAGE_SIZE * 2;
- size = min(size, test->card->host->max_req_size);
- size = min(size, test->card->host->max_seg_size);
- size = min(size, test->card->host->max_blk_count * 512);
-
- if (size < 1024)
- return RESULT_UNSUP_HOST;
-
- memset(test->buffer, 0xCD, BUFFER_SIZE);
-
- sg_init_table(&sg, 1);
- sg_init_one(&sg, test->buffer, BUFFER_SIZE);
- ret = mmc_test_transfer(test, &sg, 1, 0, size/512, 512, 0);
- if (ret)
- return ret;
-
- /* mmc_test_transfer() doesn't check for read overflows */
- for (i = size;i < BUFFER_SIZE;i++) {
- if (test->buffer[i] != 0xCD)
- return RESULT_FAIL;
- }
-
- return 0;
-}
-
#ifdef CONFIG_HIGHMEM
static int mmc_test_write_high(struct mmc_test_card *test)
@@ -1071,20 +1006,6 @@ static const struct mmc_test_case mmc_test_cases[] = {
.run = mmc_test_multi_xfersize_read,
},
- {
- .name = "Over-sized SG list write",
- .prepare = mmc_test_prepare_write,
- .run = mmc_test_bigsg_write,
- .cleanup = mmc_test_cleanup,
- },
-
- {
- .name = "Over-sized SG list read",
- .prepare = mmc_test_prepare_read,
- .run = mmc_test_bigsg_read,
- .cleanup = mmc_test_cleanup,
- },
-
#ifdef CONFIG_HIGHMEM
{
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 3ee5b8c3b5c..044d84eeed7 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -121,6 +121,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
{
#ifdef CONFIG_MMC_DEBUG
unsigned int i, sz;
+ struct scatterlist *sg;
#endif
pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
@@ -156,8 +157,8 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
#ifdef CONFIG_MMC_DEBUG
sz = 0;
- for (i = 0;i < mrq->data->sg_len;i++)
- sz += mrq->data->sg[i].length;
+ for_each_sg(mrq->data->sg, sg, mrq->data->sg_len, i)
+ sz += sg->length;
BUG_ON(sz != mrq->data->blocks * mrq->data->blksz);
#endif
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 99b20917cc0..d3f55615c09 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -61,7 +61,13 @@
/* Hardware definitions */
#define AU1XMMC_DESCRIPTOR_COUNT 1
-#define AU1XMMC_DESCRIPTOR_SIZE 2048
+
+/* max DMA seg size: 64KB on Au1100, 4MB on Au1200 */
+#ifdef CONFIG_SOC_AU1100
+#define AU1XMMC_DESCRIPTOR_SIZE 0x0000ffff
+#else /* Au1200 */
+#define AU1XMMC_DESCRIPTOR_SIZE 0x003fffff
+#endif
#define AU1XMMC_OCR (MMC_VDD_27_28 | MMC_VDD_28_29 | MMC_VDD_29_30 | \
MMC_VDD_30_31 | MMC_VDD_31_32 | MMC_VDD_32_33 | \
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index deb607c52c0..fcb14c2346c 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -143,7 +143,8 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
SDHCI_QUIRK_32BIT_DMA_SIZE |
SDHCI_QUIRK_32BIT_ADMA_SIZE |
- SDHCI_QUIRK_RESET_AFTER_REQUEST;
+ SDHCI_QUIRK_RESET_AFTER_REQUEST |
+ SDHCI_QUIRK_BROKEN_SMALL_PIO;
}
/*
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5f95e10229b..e3a8133560a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -278,6 +278,15 @@ static void sdhci_transfer_pio(struct sdhci_host *host)
else
mask = SDHCI_SPACE_AVAILABLE;
+ /*
+ * Some controllers (JMicron JMB38x) mess up the buffer bits
+ * for transfers < 4 bytes. As long as it is just one block,
+ * we can ignore the bits.
+ */
+ if ((host->quirks & SDHCI_QUIRK_BROKEN_SMALL_PIO) &&
+ (host->data->blocks == 1))
+ mask = ~0;
+
while (readl(host->ioaddr + SDHCI_PRESENT_STATE) & mask) {
if (host->data->flags & MMC_DATA_READ)
sdhci_read_block_pio(host);
@@ -439,7 +448,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
host->adma_addr = dma_map_single(mmc_dev(host->mmc),
host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
- if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
+ if (dma_mapping_error(mmc_dev(host->mmc), host->adma_addr))
goto unmap_entries;
BUG_ON(host->adma_addr & 0x3);
@@ -645,7 +654,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
* us an invalid request.
*/
WARN_ON(1);
- host->flags &= ~SDHCI_USE_DMA;
+ host->flags &= ~SDHCI_REQ_USE_DMA;
} else {
writel(host->adma_addr,
host->ioaddr + SDHCI_ADMA_ADDRESS);
@@ -664,7 +673,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_data *data)
* us an invalid request.
*/
WARN_ON(1);
- host->flags &= ~SDHCI_USE_DMA;
+ host->flags &= ~SDHCI_REQ_USE_DMA;
} else {
WARN_ON(sg_cnt != 1);
writel(sg_dma_address(data->sg),
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e354faee5df..197d4a05f4a 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -206,6 +206,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11)
/* Controller provides an incorrect timeout value for transfers */
#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12)
+/* Controller has an issue with buffer bits for small transfers */
+#define SDHCI_QUIRK_BROKEN_SMALL_PIO (1<<13)
int irq; /* Device IRQ */
void __iomem * ioaddr; /* Mapped address */
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 28cc6787a80..9b6af7e74a6 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -125,7 +125,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
int (*fill_super)(struct super_block *, void *, int),
struct vfsmount *mnt)
{
- struct nameidata nd;
+ struct block_device *bdev;
int mtdnr, ret;
if (!dev_name)
@@ -181,29 +181,20 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags,
/* try the old way - the hack where we allowed users to mount
* /dev/mtdblock$(n) but didn't actually _use_ the blockdev
*/
- ret = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
-
- DEBUG(1, "MTDSB: path_lookup() returned %d, inode %p\n",
- ret, nd.path.dentry ? nd.path.dentry->d_inode : NULL);
-
- if (ret)
+ bdev = lookup_bdev(dev_name);
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ DEBUG(1, "MTDSB: lookup_bdev() returned %d\n", ret);
return ret;
-
- ret = -EINVAL;
-
- if (!S_ISBLK(nd.path.dentry->d_inode->i_mode))
- goto out;
-
- if (nd.path.mnt->mnt_flags & MNT_NODEV) {
- ret = -EACCES;
- goto out;
}
+ DEBUG(1, "MTDSB: lookup_bdev() returned 0\n");
- if (imajor(nd.path.dentry->d_inode) != MTD_BLOCK_MAJOR)
+ ret = -EINVAL;
+ if (MAJOR(bdev->bd_dev) != MTD_BLOCK_MAJOR)
goto not_an_MTD_device;
- mtdnr = iminor(nd.path.dentry->d_inode);
- path_put(&nd.path);
+ mtdnr = MINOR(bdev->bd_dev);
+ bdput(bdev);
return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super,
mnt);
@@ -213,10 +204,8 @@ not_an_MTD_device:
printk(KERN_NOTICE
"MTD: Attempt to mount non-MTD device \"%s\"\n",
dev_name);
-out:
- path_put(&nd.path);
+ bdput(bdev);
return ret;
-
}
EXPORT_SYMBOL_GPL(get_sb_mtd);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 8ee7d7bb951..e4765b713ab 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6417,7 +6417,7 @@ static int niu_ethflow_to_class(int flow_type, u64 *class)
*class = CLASS_CODE_SCTP_IPV6;
break;
default:
- return -1;
+ return 0;
}
return 1;
diff --git a/drivers/net/ps3_gelic_wireless.c b/drivers/net/ps3_gelic_wireless.c
index 6b2dee0cf3a..a834b52a6a2 100644
--- a/drivers/net/ps3_gelic_wireless.c
+++ b/drivers/net/ps3_gelic_wireless.c
@@ -1024,7 +1024,7 @@ static int gelic_wl_set_encode(struct net_device *netdev,
struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
struct iw_point *enc = &data->encoding;
__u16 flags;
- unsigned int irqflag;
+ unsigned long irqflag;
int key_index, index_specified;
int ret = 0;
@@ -1097,7 +1097,7 @@ static int gelic_wl_get_encode(struct net_device *netdev,
{
struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
struct iw_point *enc = &data->encoding;
- unsigned int irqflag;
+ unsigned long irqflag;
unsigned int key_index, index_specified;
int ret = 0;
@@ -1215,7 +1215,7 @@ static int gelic_wl_set_encodeext(struct net_device *netdev,
struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
__u16 alg;
__u16 flags;
- unsigned int irqflag;
+ unsigned long irqflag;
int key_index;
int ret = 0;
@@ -1303,7 +1303,7 @@ static int gelic_wl_get_encodeext(struct net_device *netdev,
struct gelic_wl_info *wl = port_wl(netdev_priv(netdev));
struct iw_point *enc = &data->encoding;
struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
- unsigned int irqflag;
+ unsigned long irqflag;
int key_index;
int ret = 0;
int max_key_len;
@@ -1426,7 +1426,7 @@ static int gelic_wl_priv_set_psk(struct net_device *net_dev,
{
struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev));
unsigned int len;
- unsigned int irqflag;
+ unsigned long irqflag;
int ret = 0;
pr_debug("%s:<- len=%d\n", __func__, data->data.length);
@@ -1467,7 +1467,7 @@ static int gelic_wl_priv_get_psk(struct net_device *net_dev,
{
struct gelic_wl_info *wl = port_wl(netdev_priv(net_dev));
char *p;
- unsigned int irqflag;
+ unsigned long irqflag;
unsigned int i;
pr_debug("%s:<-\n", __func__);
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index d9769c52734..ff3fad794b6 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -43,7 +43,9 @@
#include <linux/version.h>
#include <linux/module.h>
#include <linux/delay.h>
+#include <linux/hardirq.h>
#include <linux/if.h>
+#include <linux/io.h>
#include <linux/netdevice.h>
#include <linux/cache.h>
#include <linux/pci.h>
@@ -471,9 +473,6 @@ ath5k_pci_probe(struct pci_dev *pdev,
/* Set private data */
pci_set_drvdata(pdev, hw);
- /* Enable msi for devices that support it */
- pci_enable_msi(pdev);
-
/* Setup interrupt handler */
ret = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc);
if (ret) {
@@ -551,7 +550,6 @@ err_ah:
err_irq:
free_irq(pdev->irq, sc);
err_free:
- pci_disable_msi(pdev);
ieee80211_free_hw(hw);
err_map:
pci_iounmap(pdev, mem);
@@ -573,7 +571,6 @@ ath5k_pci_remove(struct pci_dev *pdev)
ath5k_detach(pdev, hw);
ath5k_hw_detach(sc->ah);
free_irq(pdev->irq, sc);
- pci_disable_msi(pdev);
pci_iounmap(pdev, sc->iobase);
pci_release_region(pdev, 0);
pci_disable_device(pdev);
@@ -590,6 +587,9 @@ ath5k_pci_suspend(struct pci_dev *pdev, pm_message_t state)
ath5k_led_off(sc);
ath5k_stop_hw(sc);
+
+ free_irq(pdev->irq, sc);
+ pci_disable_msi(pdev);
pci_save_state(pdev);
pci_disable_device(pdev);
pci_set_power_state(pdev, PCI_D3hot);
@@ -605,15 +605,12 @@ ath5k_pci_resume(struct pci_dev *pdev)
struct ath5k_hw *ah = sc->ah;
int i, err;
- err = pci_set_power_state(pdev, PCI_D0);
- if (err)
- return err;
+ pci_restore_state(pdev);
err = pci_enable_device(pdev);
if (err)
return err;
- pci_restore_state(pdev);
/*
* Suspend/Resume resets the PCI configuration space, so we have to
* re-disable the RETRY_TIMEOUT register (0x41) to keep
@@ -621,7 +618,17 @@ ath5k_pci_resume(struct pci_dev *pdev)
*/
pci_write_config_byte(pdev, 0x41, 0);
- ath5k_init(sc);
+ pci_enable_msi(pdev);
+
+ err = request_irq(pdev->irq, ath5k_intr, IRQF_SHARED, "ath", sc);
+ if (err) {
+ ATH5K_ERR(sc, "request_irq failed\n");
+ goto err_msi;
+ }
+
+ err = ath5k_init(sc);
+ if (err)
+ goto err_irq;
ath5k_led_enable(sc);
/*
@@ -635,6 +642,12 @@ ath5k_pci_resume(struct pci_dev *pdev)
ath5k_hw_reset_key(ah, i);
return 0;
+err_irq:
+ free_irq(pdev->irq, sc);
+err_msi:
+ pci_disable_msi(pdev);
+ pci_disable_device(pdev);
+ return err;
}
#endif /* CONFIG_PM */
@@ -1224,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
pktlen = skb->len;
- if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) {
+ if (info->control.hw_key) {
keyidx = info->control.hw_key->hw_key_idx;
pktlen += info->control.icv_len;
}
@@ -1249,6 +1262,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
txq->link = &ds->ds_link;
ath5k_hw_tx_start(ah, txq->qnum);
+ mmiowb();
spin_unlock_bh(&txq->lock);
return 0;
@@ -1583,7 +1597,6 @@ ath5k_rx_stop(struct ath5k_softc *sc)
ath5k_hw_stop_pcu_recv(ah); /* disable PCU */
ath5k_hw_set_rx_filter(ah, 0); /* clear recv filter */
ath5k_hw_stop_rx_dma(ah); /* disable DMA engine */
- mdelay(3); /* 3ms is long enough for 1 frame */
ath5k_debug_printrxbuffs(sc, ah);
@@ -1682,31 +1695,44 @@ ath5k_tasklet_rx(unsigned long data)
struct ath5k_rx_status rs = {};
struct sk_buff *skb;
struct ath5k_softc *sc = (void *)data;
- struct ath5k_buf *bf;
+ struct ath5k_buf *bf, *bf_last;
struct ath5k_desc *ds;
int ret;
int hdrlen;
int pad;
spin_lock(&sc->rxbuflock);
+ if (list_empty(&sc->rxbuf)) {
+ ATH5K_WARN(sc, "empty rx buf pool\n");
+ goto unlock;
+ }
+ bf_last = list_entry(sc->rxbuf.prev, struct ath5k_buf, list);
do {
rxs.flag = 0;
- if (unlikely(list_empty(&sc->rxbuf))) {
- ATH5K_WARN(sc, "empty rx buf pool\n");
- break;
- }
bf = list_first_entry(&sc->rxbuf, struct ath5k_buf, list);
BUG_ON(bf->skb == NULL);
skb = bf->skb;
ds = bf->desc;
- /* TODO only one segment */
- pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr,
- sc->desc_len, PCI_DMA_FROMDEVICE);
-
- if (unlikely(ds->ds_link == bf->daddr)) /* this is the end */
- break;
+ /*
+ * last buffer must not be freed to ensure proper hardware
+ * function. When the hardware finishes also a packet next to
+ * it, we are sure, it doesn't use it anymore and we can go on.
+ */
+ if (bf_last == bf)
+ bf->flags |= 1;
+ if (bf->flags) {
+ struct ath5k_buf *bf_next = list_entry(bf->list.next,
+ struct ath5k_buf, list);
+ ret = sc->ah->ah_proc_rx_desc(sc->ah, bf_next->desc,
+ &rs);
+ if (ret)
+ break;
+ bf->flags &= ~1;
+ /* skip the overwritten one (even status is martian) */
+ goto next;
+ }
ret = sc->ah->ah_proc_rx_desc(sc->ah, ds, &rs);
if (unlikely(ret == -EINPROGRESS))
@@ -1752,8 +1778,6 @@ ath5k_tasklet_rx(unsigned long data)
goto next;
}
accept:
- pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr,
- rs.rs_datalen, PCI_DMA_FROMDEVICE);
pci_unmap_single(sc->pdev, bf->skbaddr, sc->rxbufsize,
PCI_DMA_FROMDEVICE);
bf->skb = NULL;
@@ -1816,6 +1840,7 @@ accept:
next:
list_move_tail(&bf->list, &sc->rxbuf);
} while (ath5k_rxbuf_setup(sc, bf) == 0);
+unlock:
spin_unlock(&sc->rxbuflock);
}
@@ -1840,9 +1865,6 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq)
list_for_each_entry_safe(bf, bf0, &txq->q, list) {
ds = bf->desc;
- /* TODO only one segment */
- pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr,
- sc->desc_len, PCI_DMA_FROMDEVICE);
ret = sc->ah->ah_proc_tx_desc(sc->ah, ds, &ts);
if (unlikely(ret == -EINPROGRESS))
break;
@@ -2015,8 +2037,6 @@ ath5k_beacon_send(struct ath5k_softc *sc)
ATH5K_WARN(sc, "beacon queue %u didn't stop?\n", sc->bhalq);
/* NB: hw still stops DMA, so proceed */
}
- pci_dma_sync_single_for_cpu(sc->pdev, bf->skbaddr, bf->skb->len,
- PCI_DMA_TODEVICE);
ath5k_hw_put_tx_buf(ah, sc->bhalq, bf->daddr);
ath5k_hw_tx_start(ah, sc->bhalq);
@@ -2240,6 +2260,7 @@ ath5k_init(struct ath5k_softc *sc)
ret = 0;
done:
+ mmiowb();
mutex_unlock(&sc->lock);
return ret;
}
@@ -2272,6 +2293,7 @@ ath5k_stop_locked(struct ath5k_softc *sc)
if (!test_bit(ATH_STAT_INVALID, sc->status)) {
ath5k_led_off(sc);
ath5k_hw_set_intr(ah, 0);
+ synchronize_irq(sc->pdev->irq);
}
ath5k_txq_cleanup(sc);
if (!test_bit(ATH_STAT_INVALID, sc->status)) {
@@ -2321,9 +2343,13 @@ ath5k_stop_hw(struct ath5k_softc *sc)
}
}
ath5k_txbuf_free(sc, sc->bbuf);
+ mmiowb();
mutex_unlock(&sc->lock);
del_timer_sync(&sc->calib_tim);
+ tasklet_kill(&sc->rxtq);
+ tasklet_kill(&sc->txtq);
+ tasklet_kill(&sc->restq);
return ret;
}
@@ -2550,8 +2576,6 @@ ath5k_init_leds(struct ath5k_softc *sc)
struct pci_dev *pdev = sc->pdev;
char name[ATH5K_LED_MAX_NAME_LEN + 1];
- sc->led_on = 0; /* active low */
-
/*
* Auto-enable soft led processing for IBM cards and for
* 5211 minipci cards.
@@ -2560,11 +2584,13 @@ ath5k_init_leds(struct ath5k_softc *sc)
pdev->device == PCI_DEVICE_ID_ATHEROS_AR5211) {
__set_bit(ATH_STAT_LEDSOFT, sc->status);
sc->led_pin = 0;
+ sc->led_on = 0; /* active low */
}
/* Enable softled on PIN1 on HP Compaq nc6xx, nc4000 & nx5000 laptops */
if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ) {
__set_bit(ATH_STAT_LEDSOFT, sc->status);
sc->led_pin = 1;
+ sc->led_on = 1; /* active high */
}
if (!test_bit(ATH_STAT_LEDSOFT, sc->status))
goto out;
@@ -2783,6 +2809,7 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
/* XXX: assoc id is set to 0 for now, mac80211 doesn't have
* a clean way of letting us retrieve this yet. */
ath5k_hw_set_associd(ah, ah->ah_bssid, 0);
+ mmiowb();
}
if (conf->changed & IEEE80211_IFCC_BEACON &&
@@ -2971,6 +2998,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
}
unlock:
+ mmiowb();
mutex_unlock(&sc->lock);
return ret;
}
@@ -3032,8 +3060,6 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
ath5k_debug_dump_skb(sc, skb, "BC ", 1);
- mutex_lock(&sc->lock);
-
if (sc->opmode != IEEE80211_IF_TYPE_IBSS) {
ret = -EIO;
goto end;
@@ -3044,11 +3070,12 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb)
ret = ath5k_beacon_setup(sc, sc->bbuf);
if (ret)
sc->bbuf->skb = NULL;
- else
+ else {
ath5k_beacon_config(sc);
+ mmiowb();
+ }
end:
- mutex_unlock(&sc->lock);
return ret;
}
diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h
index 47f414b09e6..d7e03e6b827 100644
--- a/drivers/net/wireless/ath5k/base.h
+++ b/drivers/net/wireless/ath5k/base.h
@@ -56,7 +56,7 @@
struct ath5k_buf {
struct list_head list;
- unsigned int flags; /* tx descriptor flags */
+ unsigned int flags; /* rx descriptor flags */
struct ath5k_desc *desc; /* virtual addr of desc */
dma_addr_t daddr; /* physical addr of desc */
struct sk_buff *skb; /* skbuff for buf */
diff --git a/drivers/net/wireless/ath5k/hw.c b/drivers/net/wireless/ath5k/hw.c
index c6d12c53bda..7ca87a55731 100644
--- a/drivers/net/wireless/ath5k/hw.c
+++ b/drivers/net/wireless/ath5k/hw.c
@@ -1440,6 +1440,7 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue)
/* Stop queue */
ath5k_hw_reg_write(ah, tx_queue, AR5K_CR);
+ ath5k_hw_reg_read(ah, AR5K_CR);
} else {
/*
* Schedule TX disable and wait until queue is empty
@@ -1456,6 +1457,8 @@ int ath5k_hw_stop_tx_dma(struct ath5k_hw *ah, unsigned int queue)
/* Clear register */
ath5k_hw_reg_write(ah, 0, AR5K_QCU_TXD);
+ if (pending)
+ return -EBUSY;
}
/* TODO: Check for success else return error */
@@ -1716,6 +1719,7 @@ enum ath5k_int ath5k_hw_set_intr(struct ath5k_hw *ah, enum ath5k_int new_mask)
/* ..re-enable interrupts */
ath5k_hw_reg_write(ah, AR5K_IER_ENABLE, AR5K_IER);
+ ath5k_hw_reg_read(ah, AR5K_IER);
return old_mask;
}
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index e78319aa47c..3bf3a869361 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4645,8 +4645,7 @@ static int b43_wireless_init(struct ssb_device *dev)
}
/* fill hw info */
- hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
- IEEE80211_HW_RX_INCLUDES_FCS |
+ hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
IEEE80211_HW_SIGNAL_DBM |
IEEE80211_HW_NOISE_DBM;
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 8d54502222a..9dda8169f7c 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
const struct b43_phy *phy = &dev->phy;
const struct ieee80211_hdr *wlhdr =
(const struct ieee80211_hdr *)fragment_data;
- int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+ int use_encryption = !!info->control.hw_key;
__le16 fctl = wlhdr->frame_control;
struct ieee80211_rate *fbrate;
u8 rate, rate_fb;
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index a1b8bf3ee73..2541c81932f 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3702,8 +3702,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
}
/* fill hw info */
- hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
- IEEE80211_HW_RX_INCLUDES_FCS |
+ hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
IEEE80211_HW_SIGNAL_DBM |
IEEE80211_HW_NOISE_DBM;
hw->queues = 1; /* FIXME: hardware has more queues */
@@ -3846,10 +3845,10 @@ static int b43legacy_resume(struct ssb_device *dev)
goto out;
}
}
- mutex_unlock(&wl->mutex);
b43legacydbg(wl, "Device resumed.\n");
out:
+ mutex_unlock(&wl->mutex);
return err;
}
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index e969ed8d412..68e1f8c7872 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
u16 cookie)
{
const struct ieee80211_hdr *wlhdr;
- int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+ int use_encryption = !!info->control.hw_key;
u16 fctl;
u8 rate;
struct ieee80211_rate *rate_fb;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 5bf9e00b070..c6f886ec08a 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -6442,6 +6442,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev)
if (err) {
printk(KERN_ERR "%s: pci_enable_device failed on resume\n",
dev->name);
+ mutex_unlock(&priv->action_mutex);
return err;
}
pci_restore_state(pci_dev);
@@ -7146,7 +7147,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev,
err = ipw2100_get_ordinal(priv, IPW_ORD_CURRENT_TX_RATE, &val, &len);
if (err) {
IPW_DEBUG_WX("failed querying ordinals.\n");
- return err;
+ goto done;
}
switch (val & TX_RATE_MASK) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index c2a76785b66..a51e0eaa133 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -630,7 +630,9 @@ static void iwl3945_pass_packet_to_mac80211(struct iwl3945_priv *priv,
struct ieee80211_rx_status *stats)
{
struct iwl3945_rx_packet *pkt = (struct iwl3945_rx_packet *)rxb->skb->data;
+#ifdef CONFIG_IWL3945_LEDS
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)IWL_RX_DATA(pkt);
+#endif
struct iwl3945_rx_frame_hdr *rx_hdr = IWL_RX_HDR(pkt);
struct iwl3945_rx_frame_end *rx_end = IWL_RX_END(pkt);
short len = le16_to_cpu(rx_hdr->len);
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index a44188bf445..e3427c205cc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -818,8 +818,7 @@ int iwl_setup_mac(struct iwl_priv *priv)
hw->rate_control_algorithm = "iwl-4965-rs";
/* Tell mac80211 our characteristics */
- hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
- IEEE80211_HW_SIGNAL_DBM |
+ hw->flags = IEEE80211_HW_SIGNAL_DBM |
IEEE80211_HW_NOISE_DBM;
/* Default value; 4 EDCA QOS priorities */
hw->queues = 4;
diff --git a/drivers/net/wireless/iwlwifi/iwl-debug.h b/drivers/net/wireless/iwlwifi/iwl-debug.h
index 58384805a49..d6d729e86bd 100644
--- a/drivers/net/wireless/iwlwifi/iwl-debug.h
+++ b/drivers/net/wireless/iwlwifi/iwl-debug.h
@@ -68,12 +68,8 @@ void iwl_dbgfs_unregister(struct iwl_priv *priv);
#endif
#else
-static inline void IWL_DEBUG(int level, const char *fmt, ...)
-{
-}
-static inline void IWL_DEBUG_LIMIT(int level, const char *fmt, ...)
-{
-}
+#define IWL_DEBUG(level, fmt, args...)
+#define IWL_DEBUG_LIMIT(level, fmt, args...)
#endif /* CONFIG_IWLWIFI_DEBUG */
diff --git a/drivers/net/wireless/iwlwifi/iwl-led.c b/drivers/net/wireless/iwlwifi/iwl-led.c
index 899d7a2567a..61250e6a7d1 100644
--- a/drivers/net/wireless/iwlwifi/iwl-led.c
+++ b/drivers/net/wireless/iwlwifi/iwl-led.c
@@ -268,7 +268,9 @@ static int iwl_get_blink_rate(struct iwl_priv *priv)
if (tpt < 0) /* wrapparound */
tpt = -tpt;
- IWL_DEBUG_LED("tpt %lld current_tpt %lld\n", tpt, current_tpt);
+ IWL_DEBUG_LED("tpt %lld current_tpt %llu\n",
+ (long long)tpt,
+ (unsigned long long)current_tpt);
priv->led_tpt = current_tpt;
if (!priv->allow_blinking)
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index efc750d2fc5..5a00ac23e2d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -270,6 +270,7 @@ static void iwl_rx_scan_results_notif(struct iwl_priv *priv,
static void iwl_rx_scan_complete_notif(struct iwl_priv *priv,
struct iwl_rx_mem_buffer *rxb)
{
+#ifdef CONFIG_IWLWIFI_DEBUG
struct iwl_rx_packet *pkt = (struct iwl_rx_packet *)rxb->skb->data;
struct iwl_scancomplete_notification *scan_notif = (void *)pkt->u.raw;
@@ -277,6 +278,7 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv,
scan_notif->scanned_channels,
scan_notif->tsf_low,
scan_notif->tsf_high, scan_notif->status);
+#endif
/* The HW is no longer scanning */
clear_bit(STATUS_SCAN_HW, &priv->status);
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9b50b1052b0..f72cd0bf6aa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
* first entry */
iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
- if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+ if (info->control.hw_key)
iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 4a22d3fba75..7c82ecfa30a 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
* first entry */
iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
- if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+ if (info->control.hw_key)
iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
/* Set up TFD's 2nd entry to point directly to remainder of skb,
@@ -7899,8 +7899,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e
priv->ibss_beacon = NULL;
/* Tell mac80211 our characteristics */
- hw->flags = IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
- IEEE80211_HW_SIGNAL_DBM |
+ hw->flags = IEEE80211_HW_SIGNAL_DBM |
IEEE80211_HW_NOISE_DBM;
/* 4 EDCA QOS priorities */
diff --git a/drivers/net/wireless/libertas/persistcfg.c b/drivers/net/wireless/libertas/persistcfg.c
index 6d0ff8decaf..3309a9c3cfe 100644
--- a/drivers/net/wireless/libertas/persistcfg.c
+++ b/drivers/net/wireless/libertas/persistcfg.c
@@ -48,7 +48,7 @@ static ssize_t bootflag_get(struct device *dev,
if (ret)
return ret;
- return snprintf(buf, 12, "0x%x\n", le32_to_cpu(defs.bootflag));
+ return snprintf(buf, 12, "%d\n", le32_to_cpu(defs.bootflag));
}
/**
@@ -63,8 +63,8 @@ static ssize_t bootflag_set(struct device *dev, struct device_attribute *attr,
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%x", &datum);
- if (ret != 1)
+ ret = sscanf(buf, "%d", &datum);
+ if ((ret != 1) || (datum > 1))
return -EINVAL;
*((__le32 *)&cmd.data[0]) = cpu_to_le32(!!datum);
@@ -91,7 +91,7 @@ static ssize_t boottime_get(struct device *dev,
if (ret)
return ret;
- return snprintf(buf, 12, "0x%x\n", defs.boottime);
+ return snprintf(buf, 12, "%d\n", defs.boottime);
}
/**
@@ -106,8 +106,8 @@ static ssize_t boottime_set(struct device *dev,
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%x", &datum);
- if (ret != 1)
+ ret = sscanf(buf, "%d", &datum);
+ if ((ret != 1) || (datum > 255))
return -EINVAL;
/* A too small boot time will result in the device booting into
@@ -143,7 +143,7 @@ static ssize_t channel_get(struct device *dev,
if (ret)
return ret;
- return snprintf(buf, 12, "0x%x\n", le16_to_cpu(defs.channel));
+ return snprintf(buf, 12, "%d\n", le16_to_cpu(defs.channel));
}
/**
@@ -154,11 +154,11 @@ static ssize_t channel_set(struct device *dev, struct device_attribute *attr,
{
struct lbs_private *priv = to_net_dev(dev)->priv;
struct cmd_ds_mesh_config cmd;
- uint16_t datum;
+ uint32_t datum;
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%hx", &datum);
+ ret = sscanf(buf, "%d", &datum);
if (ret != 1 || datum < 1 || datum > 11)
return -EINVAL;
@@ -274,8 +274,8 @@ static ssize_t protocol_id_set(struct device *dev,
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%x", &datum);
- if (ret != 1)
+ ret = sscanf(buf, "%d", &datum);
+ if ((ret != 1) || (datum > 255))
return -EINVAL;
/* fetch all other Information Element parameters */
@@ -328,8 +328,8 @@ static ssize_t metric_id_set(struct device *dev, struct device_attribute *attr,
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%x", &datum);
- if (ret != 1)
+ ret = sscanf(buf, "%d", &datum);
+ if ((ret != 1) || (datum > 255))
return -EINVAL;
/* fetch all other Information Element parameters */
@@ -382,8 +382,8 @@ static ssize_t capability_set(struct device *dev, struct device_attribute *attr,
int ret;
memset(&cmd, 0, sizeof(cmd));
- ret = sscanf(buf, "%x", &datum);
- if (ret != 1)
+ ret = sscanf(buf, "%d", &datum);
+ if ((ret != 1) || (datum > 255))
return -EINVAL;
/* fetch all other Information Element parameters */
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 5816230d58f..248d31a7aa3 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -500,7 +500,7 @@ failed_hw:
device_unregister(data->dev);
failed_drvdata:
ieee80211_free_hw(hw);
- hwsim_radios[i] = 0;
+ hwsim_radios[i] = NULL;
failed:
mac80211_hwsim_free();
return err;
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 3558cb21074..3078417b326 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1121,6 +1121,7 @@ static void rt2500usb_write_beacon(struct queue_entry *entry)
int pipe = usb_sndbulkpipe(usb_dev, 1);
int length;
u16 reg;
+ u32 word, len;
/*
* Add the descriptor in front of the skb.
@@ -1130,6 +1131,17 @@ static void rt2500usb_write_beacon(struct queue_entry *entry)
skbdesc->desc = entry->skb->data;
/*
+ * Adjust the beacon databyte count. The current number is
+ * calculated before this function gets called, but falsely
+ * assumes that the descriptor was already present in the SKB.
+ */
+ rt2x00_desc_read(skbdesc->desc, 0, &word);
+ len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT);
+ len += skbdesc->desc_len;
+ rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len);
+ rt2x00_desc_write(skbdesc->desc, 0, word);
+
+ /*
* Disable beaconing while we are reloading the beacon data,
* otherwise we might be sending out invalid data.
*/
@@ -1650,7 +1662,6 @@ static void rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
* Initialize all hw fields.
*/
rt2x00dev->hw->flags =
- IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
IEEE80211_HW_RX_INCLUDES_FCS |
IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
IEEE80211_HW_SIGNAL_DBM;
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 07b03b3c7ef..db2dc976d83 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -108,7 +108,10 @@
#define SHORT_PIFS ( SIFS + SHORT_SLOT_TIME )
#define DIFS ( PIFS + SLOT_TIME )
#define SHORT_DIFS ( SHORT_PIFS + SHORT_SLOT_TIME )
-#define EIFS ( SIFS + (8 * (IEEE80211_HEADER + ACK_SIZE)) )
+#define EIFS ( SIFS + DIFS + \
+ (8 * (IEEE80211_HEADER + ACK_SIZE)) )
+#define SHORT_EIFS ( SIFS + SHORT_DIFS + \
+ (8 * (IEEE80211_HEADER + ACK_SIZE)) )
/*
* Chipset identification
@@ -597,6 +600,7 @@ enum rt2x00_flags {
DEVICE_STARTED_SUSPEND,
DEVICE_ENABLED_RADIO,
DEVICE_DISABLED_RADIO_HW,
+ DEVICE_DIRTY_CONFIG,
/*
* Driver features
diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c
index f20ca712504..3f89516e833 100644
--- a/drivers/net/wireless/rt2x00/rt2x00config.c
+++ b/drivers/net/wireless/rt2x00/rt2x00config.c
@@ -271,7 +271,7 @@ config:
libconf.sifs = SIFS;
libconf.pifs = short_slot_time ? SHORT_PIFS : PIFS;
libconf.difs = short_slot_time ? SHORT_DIFS : DIFS;
- libconf.eifs = EIFS;
+ libconf.eifs = short_slot_time ? SHORT_EIFS : EIFS;
}
libconf.conf = conf;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 8c93eb8353b..f42283ad7b0 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1013,6 +1013,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev)
rt2x00dev->intf_associated = 0;
__set_bit(DEVICE_STARTED, &rt2x00dev->flags);
+ __set_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags);
return 0;
}
@@ -1237,9 +1238,9 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev)
/*
* Reconfigure device.
*/
- rt2x00lib_config(rt2x00dev, &rt2x00dev->hw->conf, 1);
- if (!rt2x00dev->hw->conf.radio_enabled)
- rt2x00lib_disable_radio(rt2x00dev);
+ retval = rt2x00mac_config(rt2x00dev->hw, &rt2x00dev->hw->conf);
+ if (retval)
+ goto exit;
/*
* Iterator over each active interface to
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index f2c9b0e79b5..c5fb3a72cf3 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -125,13 +125,6 @@ void rt2x00queue_unmap_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
/**
- * rt2x00queue_free_skb - free a skb
- * @rt2x00dev: Pointer to &struct rt2x00_dev.
- * @skb: The skb to free.
- */
-void rt2x00queue_free_skb(struct rt2x00_dev *rt2x00dev, struct sk_buff *skb);
-
-/**
* rt2x00queue_write_tx_frame - Write TX frame to hardware
* @queue: Queue over which the frame should be send
* @skb: The skb to send
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index f1dcbaa80c3..c3ee4ecba79 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
*/
memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
rts_info = IEEE80211_SKB_CB(skb);
- rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+ rts_info->control.hw_key = NULL;
rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
@@ -83,6 +83,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
(struct ieee80211_rts *)(skb->data));
if (rt2x00queue_write_tx_frame(queue, skb)) {
+ dev_kfree_skb_any(skb);
WARNING(rt2x00dev, "Failed to send RTS/CTS frame.\n");
return NETDEV_TX_BUSY;
}
@@ -96,7 +97,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
enum data_queue_qid qid = skb_get_queue_mapping(skb);
- struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
struct data_queue *queue;
u16 frame_control;
@@ -152,18 +152,6 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
}
}
- /*
- * XXX: This is as wrong as the old mac80211 code was,
- * due to beacons not getting sequence numbers assigned
- * properly.
- */
- if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
- if (tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
- intf->seqno += 0x10;
- ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
- ieee80211hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
- }
-
if (rt2x00queue_write_tx_frame(queue, skb)) {
ieee80211_stop_queue(rt2x00dev->hw, qid);
return NETDEV_TX_BUSY;
@@ -322,6 +310,7 @@ EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface);
int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
{
struct rt2x00_dev *rt2x00dev = hw->priv;
+ int force_reconfig;
/*
* Mac80211 might be calling this function while we are trying
@@ -341,7 +330,17 @@ int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
rt2x00lib_toggle_rx(rt2x00dev, STATE_RADIO_RX_OFF);
}
- rt2x00lib_config(rt2x00dev, conf, 0);
+ /*
+ * When the DEVICE_DIRTY_CONFIG flag is set, the device has recently
+ * been started and the configuration must be forced upon the hardware.
+ * Otherwise registers will not be intialized correctly and could
+ * result in non-working hardware because essential registers aren't
+ * initialized.
+ */
+ force_reconfig =
+ __test_and_clear_bit(DEVICE_DIRTY_CONFIG, &rt2x00dev->flags);
+
+ rt2x00lib_config(rt2x00dev, conf, force_reconfig);
/*
* Reenable RX only if the radio should be on.
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 7f442030f5a..3b27f6aa860 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -120,6 +120,7 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
{
struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
+ struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data;
struct ieee80211_rate *rate =
ieee80211_get_tx_rate(rt2x00dev->hw, tx_info);
@@ -200,6 +201,31 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
}
/*
+ * Hardware should insert sequence counter.
+ * FIXME: We insert a software sequence counter first for
+ * hardware that doesn't support hardware sequence counting.
+ *
+ * This is wrong because beacons are not getting sequence
+ * numbers assigned properly.
+ *
+ * A secondary problem exists for drivers that cannot toggle
+ * sequence counting per-frame, since those will override the
+ * sequence counter given by mac80211.
+ */
+ if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
+ spin_lock(&intf->lock);
+
+ if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags))
+ intf->seqno += 0x10;
+ hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
+ hdr->seq_ctrl |= cpu_to_le16(intf->seqno);
+
+ spin_unlock(&intf->lock);
+
+ __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
+ }
+
+ /*
* PLCP setup
* Length calculation depends on OFDM/CCK rate.
*/
@@ -466,9 +492,12 @@ void rt2x00queue_init_rx(struct rt2x00_dev *rt2x00dev)
if (!rt2x00dev->ops->lib->init_rxentry)
return;
- for (i = 0; i < queue->limit; i++)
+ for (i = 0; i < queue->limit; i++) {
+ queue->entries[i].flags = 0;
+
rt2x00dev->ops->lib->init_rxentry(rt2x00dev,
&queue->entries[i]);
+ }
}
void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev)
@@ -482,9 +511,12 @@ void rt2x00queue_init_tx(struct rt2x00_dev *rt2x00dev)
if (!rt2x00dev->ops->lib->init_txentry)
continue;
- for (i = 0; i < queue->limit; i++)
+ for (i = 0; i < queue->limit; i++) {
+ queue->entries[i].flags = 0;
+
rt2x00dev->ops->lib->init_txentry(rt2x00dev,
&queue->entries[i]);
+ }
}
}
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index 8945945c892..a4a8c57004d 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -199,6 +199,7 @@ struct txdone_entry_desc {
* @ENTRY_TXD_RTS_FRAME: This frame is a RTS frame.
* @ENTRY_TXD_CTS_FRAME: This frame is a CTS-to-self frame.
* @ENTRY_TXD_OFDM_RATE: This frame is send out with an OFDM rate.
+ * @ENTRY_TXD_GENERATE_SEQ: This frame requires sequence counter.
* @ENTRY_TXD_FIRST_FRAGMENT: This is the first frame.
* @ENTRY_TXD_MORE_FRAG: This frame is followed by another fragment.
* @ENTRY_TXD_REQ_TIMESTAMP: Require timestamp to be inserted.
@@ -210,6 +211,7 @@ enum txentry_desc_flags {
ENTRY_TXD_RTS_FRAME,
ENTRY_TXD_CTS_FRAME,
ENTRY_TXD_OFDM_RATE,
+ ENTRY_TXD_GENERATE_SEQ,
ENTRY_TXD_FIRST_FRAGMENT,
ENTRY_TXD_MORE_FRAG,
ENTRY_TXD_REQ_TIMESTAMP,
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index 83862e7f7ae..933e6cc9359 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -122,6 +122,38 @@ int rt2x00usb_vendor_request_buff(struct rt2x00_dev *rt2x00dev,
}
EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_buff);
+int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev,
+ const u8 request, const u8 requesttype,
+ const u16 offset, void *buffer,
+ const u16 buffer_length,
+ const int timeout)
+{
+ int status = 0;
+ unsigned char *tb;
+ u16 off, len, bsize;
+
+ mutex_lock(&rt2x00dev->usb_cache_mutex);
+
+ tb = buffer;
+ off = offset;
+ len = buffer_length;
+ while (len && !status) {
+ bsize = min_t(u16, CSR_CACHE_SIZE, len);
+ status = rt2x00usb_vendor_req_buff_lock(rt2x00dev, request,
+ requesttype, off, tb,
+ bsize, timeout);
+
+ tb += bsize;
+ len -= bsize;
+ off += bsize;
+ }
+
+ mutex_unlock(&rt2x00dev->usb_cache_mutex);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(rt2x00usb_vendor_request_large_buff);
+
/*
* TX data handlers.
*/
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.h b/drivers/net/wireless/rt2x00/rt2x00usb.h
index aad794adf52..ee3875f894a 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.h
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.h
@@ -70,8 +70,7 @@
/*
* Cache size
*/
-#define CSR_CACHE_SIZE 8
-#define CSR_CACHE_SIZE_FIRMWARE 64
+#define CSR_CACHE_SIZE 64
/*
* USB request types.
@@ -172,6 +171,25 @@ int rt2x00usb_vendor_req_buff_lock(struct rt2x00_dev *rt2x00dev,
const u16 buffer_length, const int timeout);
/**
+ * rt2x00usb_vendor_request_large_buff - Send register command to device (buffered)
+ * @rt2x00dev: Pointer to &struct rt2x00_dev
+ * @request: USB vendor command (See &enum rt2x00usb_vendor_request)
+ * @requesttype: Request type &USB_VENDOR_REQUEST_*
+ * @offset: Register start offset to perform action on
+ * @buffer: Buffer where information will be read/written to by device
+ * @buffer_length: Size of &buffer
+ * @timeout: Operation timeout
+ *
+ * This function is used to transfer register data in blocks larger
+ * then CSR_CACHE_SIZE. Use for firmware upload, keys and beacons.
+ */
+int rt2x00usb_vendor_request_large_buff(struct rt2x00_dev *rt2x00dev,
+ const u8 request, const u8 requesttype,
+ const u16 offset, void *buffer,
+ const u16 buffer_length,
+ const int timeout);
+
+/**
* rt2x00usb_vendor_request_sw - Send single register command to device
* @rt2x00dev: Pointer to &struct rt2x00_dev
* @request: USB vendor command (See &enum rt2x00usb_vendor_request)
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index f7c1f92c144..fbe2a652e01 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -1544,7 +1544,8 @@ static void rt61pci_write_tx_desc(struct rt2x00_dev *rt2x00dev,
rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min);
rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max);
rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER);
- rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1);
+ rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE,
+ test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags));
rt2x00_set_field32(&word, TXD_W1_BUFFER_COUNT, 1);
rt2x00_desc_write(txd, 1, word);
@@ -2278,7 +2279,6 @@ static void rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
* Initialize all hw fields.
*/
rt2x00dev->hw->flags =
- IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
IEEE80211_HW_SIGNAL_DBM;
rt2x00dev->hw->extra_tx_headroom = 0;
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index d383735ab8f..9761eaaa08b 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -890,9 +890,6 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data,
unsigned int i;
int status;
u32 reg;
- const char *ptr = data;
- char *cache;
- int buflen;
/*
* Wait for stable hardware.
@@ -911,31 +908,12 @@ static int rt73usb_load_firmware(struct rt2x00_dev *rt2x00dev, const void *data,
/*
* Write firmware to device.
- * We setup a seperate cache for this action,
- * since we are going to write larger chunks of data
- * then normally used cache size.
*/
- cache = kmalloc(CSR_CACHE_SIZE_FIRMWARE, GFP_KERNEL);
- if (!cache) {
- ERROR(rt2x00dev, "Failed to allocate firmware cache.\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < len; i += CSR_CACHE_SIZE_FIRMWARE) {
- buflen = min_t(int, len - i, CSR_CACHE_SIZE_FIRMWARE);
-
- memcpy(cache, ptr, buflen);
-
- rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE,
- USB_VENDOR_REQUEST_OUT,
- FIRMWARE_IMAGE_BASE + i, 0,
- cache, buflen,
- REGISTER_TIMEOUT32(buflen));
-
- ptr += buflen;
- }
-
- kfree(cache);
+ rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE,
+ USB_VENDOR_REQUEST_OUT,
+ FIRMWARE_IMAGE_BASE,
+ data, len,
+ REGISTER_TIMEOUT32(len));
/*
* Send firmware request to device to load firmware,
@@ -1303,7 +1281,8 @@ static void rt73usb_write_tx_desc(struct rt2x00_dev *rt2x00dev,
rt2x00_set_field32(&word, TXD_W1_CWMIN, txdesc->cw_min);
rt2x00_set_field32(&word, TXD_W1_CWMAX, txdesc->cw_max);
rt2x00_set_field32(&word, TXD_W1_IV_OFFSET, IEEE80211_HEADER);
- rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE, 1);
+ rt2x00_set_field32(&word, TXD_W1_HW_SEQUENCE,
+ test_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags));
rt2x00_desc_write(txd, 1, word);
rt2x00_desc_read(txd, 2, &word);
@@ -1352,6 +1331,7 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb);
unsigned int beacon_base;
u32 reg;
+ u32 word, len;
/*
* Add the descriptor in front of the skb.
@@ -1361,6 +1341,17 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
skbdesc->desc = entry->skb->data;
/*
+ * Adjust the beacon databyte count. The current number is
+ * calculated before this function gets called, but falsely
+ * assumes that the descriptor was already present in the SKB.
+ */
+ rt2x00_desc_read(skbdesc->desc, 0, &word);
+ len = rt2x00_get_field32(word, TXD_W0_DATABYTE_COUNT);
+ len += skbdesc->desc_len;
+ rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, len);
+ rt2x00_desc_write(skbdesc->desc, 0, word);
+
+ /*
* Disable beaconing while we are reloading the beacon data,
* otherwise we might be sending out invalid data.
*/
@@ -1374,10 +1365,10 @@ static void rt73usb_write_beacon(struct queue_entry *entry)
* Write entire beacon with descriptor to register.
*/
beacon_base = HW_BEACON_OFFSET(entry->entry_idx);
- rt2x00usb_vendor_request(rt2x00dev, USB_MULTI_WRITE,
- USB_VENDOR_REQUEST_OUT, beacon_base, 0,
- entry->skb->data, entry->skb->len,
- REGISTER_TIMEOUT32(entry->skb->len));
+ rt2x00usb_vendor_request_large_buff(rt2x00dev, USB_MULTI_WRITE,
+ USB_VENDOR_REQUEST_OUT, beacon_base,
+ entry->skb->data, entry->skb->len,
+ REGISTER_TIMEOUT32(entry->skb->len));
/*
* Clean up the beacon skb.
@@ -1871,7 +1862,6 @@ static void rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
* Initialize all hw fields.
*/
rt2x00dev->hw->flags =
- IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
IEEE80211_HW_SIGNAL_DBM;
rt2x00dev->hw->extra_tx_headroom = TXD_DESC_SIZE;
diff --git a/drivers/net/wireless/rtl8187.h b/drivers/net/wireless/rtl8187.h
index 3afb49f8866..1b0d750f662 100644
--- a/drivers/net/wireless/rtl8187.h
+++ b/drivers/net/wireless/rtl8187.h
@@ -47,11 +47,13 @@ struct rtl8187_rx_hdr {
struct rtl8187b_rx_hdr {
__le32 flags;
__le64 mac_time;
- u8 noise;
- u8 signal;
+ u8 sq;
+ u8 rssi;
u8 agc;
- u8 reserved;
- __le32 unused;
+ u8 flags2;
+ __le16 snr_long2end;
+ s8 pwdb_g12;
+ u8 fot;
} __attribute__((packed));
/* {rtl8187,rtl8187b}_tx_info is in skb */
@@ -100,6 +102,7 @@ struct rtl8187_priv {
struct usb_device *udev;
u32 rx_conf;
u16 txpwr_base;
+ u16 seqno;
u8 asic_rev;
u8 is_rtl8187b;
enum {
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index d3067b1216c..177988efd66 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -169,6 +169,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
{
struct rtl8187_priv *priv = dev->priv;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *ieee80211hdr = (struct ieee80211_hdr *)skb->data;
unsigned int ep;
void *buf;
struct urb *urb;
@@ -234,6 +235,20 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
ep = epmap[skb_get_queue_mapping(skb)];
}
+ /* FIXME: The sequence that follows is needed for this driver to
+ * work with mac80211 since "mac80211: fix TX sequence numbers".
+ * As with the temporary code in rt2x00, changes will be needed
+ * to get proper sequence numbers on beacons. In addition, this
+ * patch places the sequence number in the hardware state, which
+ * limits us to a single virtual state.
+ */
+ if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
+ if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
+ priv->seqno += 0x10;
+ ieee80211hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG);
+ ieee80211hdr->seq_ctrl |= cpu_to_le16(priv->seqno);
+ }
+
info->driver_data[0] = dev;
info->driver_data[1] = urb;
@@ -257,6 +272,7 @@ static void rtl8187_rx_cb(struct urb *urb)
struct ieee80211_rx_status rx_status = { 0 };
int rate, signal;
u32 flags;
+ u32 quality;
spin_lock(&priv->rx_queue.lock);
if (skb->next)
@@ -280,44 +296,57 @@ static void rtl8187_rx_cb(struct urb *urb)
flags = le32_to_cpu(hdr->flags);
signal = hdr->signal & 0x7f;
rx_status.antenna = (hdr->signal >> 7) & 1;
- rx_status.signal = signal;
rx_status.noise = hdr->noise;
rx_status.mactime = le64_to_cpu(hdr->mac_time);
- priv->signal = signal;
priv->quality = signal;
+ rx_status.qual = priv->quality;
priv->noise = hdr->noise;
+ rate = (flags >> 20) & 0xF;
+ if (rate > 3) { /* OFDM rate */
+ if (signal > 90)
+ signal = 90;
+ else if (signal < 25)
+ signal = 25;
+ signal = 90 - signal;
+ } else { /* CCK rate */
+ if (signal > 95)
+ signal = 95;
+ else if (signal < 30)
+ signal = 30;
+ signal = 95 - signal;
+ }
+ rx_status.signal = signal;
+ priv->signal = signal;
} else {
struct rtl8187b_rx_hdr *hdr =
(typeof(hdr))(skb_tail_pointer(skb) - sizeof(*hdr));
+ /* The Realtek datasheet for the RTL8187B shows that the RX
+ * header contains the following quantities: signal quality,
+ * RSSI, AGC, the received power in dB, and the measured SNR.
+ * In testing, none of these quantities show qualitative
+ * agreement with AP signal strength, except for the AGC,
+ * which is inversely proportional to the strength of the
+ * signal. In the following, the quality and signal strength
+ * are derived from the AGC. The arbitrary scaling constants
+ * are chosen to make the results close to the values obtained
+ * for a BCM4312 using b43 as the driver. The noise is ignored
+ * for now.
+ */
flags = le32_to_cpu(hdr->flags);
- signal = hdr->agc >> 1;
- rx_status.antenna = (hdr->signal >> 7) & 1;
- rx_status.signal = 64 - min(hdr->noise, (u8)64);
- rx_status.noise = hdr->noise;
+ quality = 170 - hdr->agc;
+ if (quality > 100)
+ quality = 100;
+ signal = 14 - hdr->agc / 2;
+ rx_status.qual = quality;
+ priv->quality = quality;
+ rx_status.signal = signal;
+ priv->signal = signal;
+ rx_status.antenna = (hdr->rssi >> 7) & 1;
rx_status.mactime = le64_to_cpu(hdr->mac_time);
- priv->signal = hdr->signal;
- priv->quality = hdr->agc >> 1;
- priv->noise = hdr->noise;
+ rate = (flags >> 20) & 0xF;
}
skb_trim(skb, flags & 0x0FFF);
- rate = (flags >> 20) & 0xF;
- if (rate > 3) { /* OFDM rate */
- if (signal > 90)
- signal = 90;
- else if (signal < 25)
- signal = 25;
- signal = 90 - signal;
- } else { /* CCK rate */
- if (signal > 95)
- signal = 95;
- else if (signal < 30)
- signal = 30;
- signal = 95 - signal;
- }
-
- rx_status.qual = priv->quality;
- rx_status.signal = signal;
rx_status.rate_idx = rate;
rx_status.freq = dev->conf.channel->center_freq;
rx_status.band = dev->conf.channel->band;
@@ -1015,9 +1044,7 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
priv->mode = IEEE80211_IF_TYPE_MNTR;
dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
- IEEE80211_HW_RX_INCLUDES_FCS |
- IEEE80211_HW_SIGNAL_UNSPEC;
- dev->max_signal = 65;
+ IEEE80211_HW_RX_INCLUDES_FCS;
eeprom.data = dev;
eeprom.register_read = rtl8187_eeprom_register_read;
@@ -1132,10 +1159,16 @@ static int __devinit rtl8187_probe(struct usb_interface *intf,
(*channel++).hw_value = txpwr >> 8;
}
- if (priv->is_rtl8187b)
+ if (priv->is_rtl8187b) {
printk(KERN_WARNING "rtl8187: 8187B chip detected. Support "
"is EXPERIMENTAL, and could damage your\n"
" hardware, use at your own risk\n");
+ dev->flags |= IEEE80211_HW_SIGNAL_DBM;
+ } else {
+ dev->flags |= IEEE80211_HW_SIGNAL_UNSPEC;
+ dev->max_signal = 65;
+ }
+
if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b)
printk(KERN_INFO "rtl8187: inconsistency between id with OEM"
" info!\n");
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index fcc532bb6a7..4d7b98b0503 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -935,7 +935,6 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf)
hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &mac->band;
hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
- IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE |
IEEE80211_HW_SIGNAL_DB;
hw->max_signal = 100;
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 4d17d384578..9ce55850271 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -49,6 +49,13 @@ config BATTERY_OLPC
help
Say Y to enable support for the battery on the OLPC laptop.
+config BATTERY_TOSA
+ tristate "Sharp SL-6000 (tosa) battery"
+ depends on MACH_TOSA && MFD_TC6393XB
+ help
+ Say Y to enable support for the battery on the Sharp Zaurus
+ SL-6000 (tosa) models.
+
config BATTERY_PALMTX
tristate "Palm T|X battery"
depends on MACH_PALMTX
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 6f43a54ee42..4706bf8ff45 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -20,4 +20,5 @@ obj-$(CONFIG_APM_POWER) += apm_power.o
obj-$(CONFIG_BATTERY_DS2760) += ds2760_battery.o
obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o
obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o
+obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o
obj-$(CONFIG_BATTERY_PALMTX) += palmtx_battery.o
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index ab1e8289f07..32570af3c5c 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -19,7 +19,7 @@
#define EC_BAT_VOLTAGE 0x10 /* uint16_t, *9.76/32, mV */
#define EC_BAT_CURRENT 0x11 /* int16_t, *15.625/120, mA */
-#define EC_BAT_ACR 0x12
+#define EC_BAT_ACR 0x12 /* int16_t, *6250/15, µAh */
#define EC_BAT_TEMP 0x13 /* uint16_t, *100/256, °C */
#define EC_AMB_TEMP 0x14 /* uint16_t, *100/256, °C */
#define EC_BAT_STATUS 0x15 /* uint8_t, bitmask */
@@ -84,6 +84,119 @@ static struct power_supply olpc_ac = {
.get_property = olpc_ac_get_prop,
};
+static char bat_serial[17]; /* Ick */
+
+static int olpc_bat_get_status(union power_supply_propval *val, uint8_t ec_byte)
+{
+ if (olpc_platform_info.ecver > 0x44) {
+ if (ec_byte & BAT_STAT_CHARGING)
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ else if (ec_byte & BAT_STAT_DISCHARGING)
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ else if (ec_byte & BAT_STAT_FULL)
+ val->intval = POWER_SUPPLY_STATUS_FULL;
+ else /* er,... */
+ val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ } else {
+ /* Older EC didn't report charge/discharge bits */
+ if (!(ec_byte & BAT_STAT_AC)) /* No AC means discharging */
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ else if (ec_byte & BAT_STAT_FULL)
+ val->intval = POWER_SUPPLY_STATUS_FULL;
+ else /* Not _necessarily_ true but EC doesn't tell all yet */
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ }
+
+ return 0;
+}
+
+static int olpc_bat_get_health(union power_supply_propval *val)
+{
+ uint8_t ec_byte;
+ int ret;
+
+ ret = olpc_ec_cmd(EC_BAT_ERRCODE, NULL, 0, &ec_byte, 1);
+ if (ret)
+ return ret;
+
+ switch (ec_byte) {
+ case 0:
+ val->intval = POWER_SUPPLY_HEALTH_GOOD;
+ break;
+
+ case BAT_ERR_OVERTEMP:
+ val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+ break;
+
+ case BAT_ERR_OVERVOLTAGE:
+ val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+ break;
+
+ case BAT_ERR_INFOFAIL:
+ case BAT_ERR_OUT_OF_CONTROL:
+ case BAT_ERR_ID_FAIL:
+ case BAT_ERR_ACR_FAIL:
+ val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+ break;
+
+ default:
+ /* Eep. We don't know this failure code */
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int olpc_bat_get_mfr(union power_supply_propval *val)
+{
+ uint8_t ec_byte;
+ int ret;
+
+ ec_byte = BAT_ADDR_MFR_TYPE;
+ ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
+ if (ret)
+ return ret;
+
+ switch (ec_byte >> 4) {
+ case 1:
+ val->strval = "Gold Peak";
+ break;
+ case 2:
+ val->strval = "BYD";
+ break;
+ default:
+ val->strval = "Unknown";
+ break;
+ }
+
+ return ret;
+}
+
+static int olpc_bat_get_tech(union power_supply_propval *val)
+{
+ uint8_t ec_byte;
+ int ret;
+
+ ec_byte = BAT_ADDR_MFR_TYPE;
+ ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
+ if (ret)
+ return ret;
+
+ switch (ec_byte & 0xf) {
+ case 1:
+ val->intval = POWER_SUPPLY_TECHNOLOGY_NiMH;
+ break;
+ case 2:
+ val->intval = POWER_SUPPLY_TECHNOLOGY_LiFe;
+ break;
+ default:
+ val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+ break;
+ }
+
+ return ret;
+}
+
/*********************************************************************
* Battery properties
*********************************************************************/
@@ -94,6 +207,7 @@ static int olpc_bat_get_property(struct power_supply *psy,
int ret = 0;
int16_t ec_word;
uint8_t ec_byte;
+ uint64_t ser_buf;
ret = olpc_ec_cmd(EC_BAT_STATUS, NULL, 0, &ec_byte, 1);
if (ret)
@@ -110,25 +224,10 @@ static int olpc_bat_get_property(struct power_supply *psy,
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
- if (olpc_platform_info.ecver > 0x44) {
- if (ec_byte & BAT_STAT_CHARGING)
- val->intval = POWER_SUPPLY_STATUS_CHARGING;
- else if (ec_byte & BAT_STAT_DISCHARGING)
- val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
- else if (ec_byte & BAT_STAT_FULL)
- val->intval = POWER_SUPPLY_STATUS_FULL;
- else /* er,... */
- val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
- } else {
- /* Older EC didn't report charge/discharge bits */
- if (!(ec_byte & BAT_STAT_AC)) /* No AC means discharging */
- val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
- else if (ec_byte & BAT_STAT_FULL)
- val->intval = POWER_SUPPLY_STATUS_FULL;
- else /* Not _necessarily_ true but EC doesn't tell all yet */
- val->intval = POWER_SUPPLY_STATUS_CHARGING;
- break;
- }
+ ret = olpc_bat_get_status(val, ec_byte);
+ if (ret)
+ return ret;
+ break;
case POWER_SUPPLY_PROP_PRESENT:
val->intval = !!(ec_byte & BAT_STAT_PRESENT);
break;
@@ -137,72 +236,21 @@ static int olpc_bat_get_property(struct power_supply *psy,
if (ec_byte & BAT_STAT_DESTROY)
val->intval = POWER_SUPPLY_HEALTH_DEAD;
else {
- ret = olpc_ec_cmd(EC_BAT_ERRCODE, NULL, 0, &ec_byte, 1);
+ ret = olpc_bat_get_health(val);
if (ret)
return ret;
-
- switch (ec_byte) {
- case 0:
- val->intval = POWER_SUPPLY_HEALTH_GOOD;
- break;
-
- case BAT_ERR_OVERTEMP:
- val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
- break;
-
- case BAT_ERR_OVERVOLTAGE:
- val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
- break;
-
- case BAT_ERR_INFOFAIL:
- case BAT_ERR_OUT_OF_CONTROL:
- case BAT_ERR_ID_FAIL:
- case BAT_ERR_ACR_FAIL:
- val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
- break;
-
- default:
- /* Eep. We don't know this failure code */
- return -EIO;
- }
}
break;
case POWER_SUPPLY_PROP_MANUFACTURER:
- ec_byte = BAT_ADDR_MFR_TYPE;
- ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
+ ret = olpc_bat_get_mfr(val);
if (ret)
return ret;
-
- switch (ec_byte >> 4) {
- case 1:
- val->strval = "Gold Peak";
- break;
- case 2:
- val->strval = "BYD";
- break;
- default:
- val->strval = "Unknown";
- break;
- }
break;
case POWER_SUPPLY_PROP_TECHNOLOGY:
- ec_byte = BAT_ADDR_MFR_TYPE;
- ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1, &ec_byte, 1);
+ ret = olpc_bat_get_tech(val);
if (ret)
return ret;
-
- switch (ec_byte & 0xf) {
- case 1:
- val->intval = POWER_SUPPLY_TECHNOLOGY_NiMH;
- break;
- case 2:
- val->intval = POWER_SUPPLY_TECHNOLOGY_LiFe;
- break;
- default:
- val->intval = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
- break;
- }
break;
case POWER_SUPPLY_PROP_VOLTAGE_AVG:
ret = olpc_ec_cmd(EC_BAT_VOLTAGE, NULL, 0, (void *)&ec_word, 2);
@@ -241,6 +289,22 @@ static int olpc_bat_get_property(struct power_supply *psy,
ec_word = be16_to_cpu(ec_word);
val->intval = ec_word * 100 / 256;
break;
+ case POWER_SUPPLY_PROP_CHARGE_COUNTER:
+ ret = olpc_ec_cmd(EC_BAT_ACR, NULL, 0, (void *)&ec_word, 2);
+ if (ret)
+ return ret;
+
+ ec_word = be16_to_cpu(ec_word);
+ val->intval = ec_word * 6250 / 15;
+ break;
+ case POWER_SUPPLY_PROP_SERIAL_NUMBER:
+ ret = olpc_ec_cmd(EC_BAT_SERIAL, NULL, 0, (void *)&ser_buf, 8);
+ if (ret)
+ return ret;
+
+ sprintf(bat_serial, "%016llx", (long long)be64_to_cpu(ser_buf));
+ val->strval = bat_serial;
+ break;
default:
ret = -EINVAL;
break;
@@ -260,6 +324,50 @@ static enum power_supply_property olpc_bat_props[] = {
POWER_SUPPLY_PROP_TEMP,
POWER_SUPPLY_PROP_TEMP_AMBIENT,
POWER_SUPPLY_PROP_MANUFACTURER,
+ POWER_SUPPLY_PROP_SERIAL_NUMBER,
+ POWER_SUPPLY_PROP_CHARGE_COUNTER,
+};
+
+/* EEPROM reading goes completely around the power_supply API, sadly */
+
+#define EEPROM_START 0x20
+#define EEPROM_END 0x80
+#define EEPROM_SIZE (EEPROM_END - EEPROM_START)
+
+static ssize_t olpc_bat_eeprom_read(struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off, size_t count)
+{
+ uint8_t ec_byte;
+ int ret, end;
+
+ if (off >= EEPROM_SIZE)
+ return 0;
+ if (off + count > EEPROM_SIZE)
+ count = EEPROM_SIZE - off;
+
+ end = EEPROM_START + off + count;
+ for (ec_byte = EEPROM_START + off; ec_byte < end; ec_byte++) {
+ ret = olpc_ec_cmd(EC_BAT_EEPROM, &ec_byte, 1,
+ &buf[ec_byte - EEPROM_START], 1);
+ if (ret) {
+ printk(KERN_ERR "olpc-battery: EC command "
+ "EC_BAT_EEPROM @ 0x%x failed -"
+ " %d!\n", ec_byte, ret);
+ return -EIO;
+ }
+ }
+
+ return count;
+}
+
+static struct bin_attribute olpc_bat_eeprom = {
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO,
+ .owner = THIS_MODULE,
+ },
+ .size = 0,
+ .read = olpc_bat_eeprom_read,
};
/*********************************************************************
@@ -290,8 +398,14 @@ static int __init olpc_bat_init(void)
if (!olpc_platform_info.ecver)
return -ENXIO;
- if (olpc_platform_info.ecver < 0x43) {
- printk(KERN_NOTICE "OLPC EC version 0x%02x too old for battery driver.\n", olpc_platform_info.ecver);
+
+ /*
+ * We've seen a number of EC protocol changes; this driver requires
+ * the latest EC protocol, supported by 0x44 and above.
+ */
+ if (olpc_platform_info.ecver < 0x44) {
+ printk(KERN_NOTICE "OLPC EC version 0x%02x too old for "
+ "battery driver.\n", olpc_platform_info.ecver);
return -ENXIO;
}
@@ -315,8 +429,14 @@ static int __init olpc_bat_init(void)
if (ret)
goto battery_failed;
+ ret = device_create_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
+ if (ret)
+ goto eeprom_failed;
+
goto success;
+eeprom_failed:
+ power_supply_unregister(&olpc_bat);
battery_failed:
power_supply_unregister(&olpc_ac);
ac_failed:
@@ -327,6 +447,7 @@ success:
static void __exit olpc_bat_exit(void)
{
+ device_remove_bin_file(olpc_bat.dev, &olpc_bat_eeprom);
power_supply_unregister(&olpc_bat);
power_supply_unregister(&olpc_ac);
platform_device_unregister(bat_pdev);
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 49215da5249..fe2aeb11939 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -99,6 +99,7 @@ static struct device_attribute power_supply_attrs[] = {
POWER_SUPPLY_ATTR(charge_empty),
POWER_SUPPLY_ATTR(charge_now),
POWER_SUPPLY_ATTR(charge_avg),
+ POWER_SUPPLY_ATTR(charge_counter),
POWER_SUPPLY_ATTR(energy_full_design),
POWER_SUPPLY_ATTR(energy_empty_design),
POWER_SUPPLY_ATTR(energy_full),
diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c
new file mode 100644
index 00000000000..bf664fbd661
--- /dev/null
+++ b/drivers/power/tosa_battery.c
@@ -0,0 +1,486 @@
+/*
+ * Battery and Power Management code for the Sharp SL-6000x
+ *
+ * Copyright (c) 2005 Dirk Opfer
+ * Copyright (c) 2008 Dmitry Baryshkov
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/power_supply.h>
+#include <linux/wm97xx.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+
+#include <asm/mach-types.h>
+#include <asm/arch/tosa.h>
+
+static DEFINE_MUTEX(bat_lock); /* protects gpio pins */
+static struct work_struct bat_work;
+
+struct tosa_bat {
+ int status;
+ struct power_supply psy;
+ int full_chrg;
+
+ struct mutex work_lock; /* protects data */
+
+ bool (*is_present)(struct tosa_bat *bat);
+ int gpio_full;
+ int gpio_charge_off;
+
+ int technology;
+
+ int gpio_bat;
+ int adc_bat;
+ int adc_bat_divider;
+ int bat_max;
+ int bat_min;
+
+ int gpio_temp;
+ int adc_temp;
+ int adc_temp_divider;
+};
+
+static struct tosa_bat tosa_bat_main;
+static struct tosa_bat tosa_bat_jacket;
+
+static unsigned long tosa_read_bat(struct tosa_bat *bat)
+{
+ unsigned long value = 0;
+
+ if (bat->gpio_bat < 0 || bat->adc_bat < 0)
+ return 0;
+
+ mutex_lock(&bat_lock);
+ gpio_set_value(bat->gpio_bat, 1);
+ msleep(5);
+ value = wm97xx_read_aux_adc(bat->psy.dev->parent->driver_data,
+ bat->adc_bat);
+ gpio_set_value(bat->gpio_bat, 0);
+ mutex_unlock(&bat_lock);
+
+ value = value * 1000000 / bat->adc_bat_divider;
+
+ return value;
+}
+
+static unsigned long tosa_read_temp(struct tosa_bat *bat)
+{
+ unsigned long value = 0;
+
+ if (bat->gpio_temp < 0 || bat->adc_temp < 0)
+ return 0;
+
+ mutex_lock(&bat_lock);
+ gpio_set_value(bat->gpio_temp, 1);
+ msleep(5);
+ value = wm97xx_read_aux_adc(bat->psy.dev->parent->driver_data,
+ bat->adc_temp);
+ gpio_set_value(bat->gpio_temp, 0);
+ mutex_unlock(&bat_lock);
+
+ value = value * 10000 / bat->adc_temp_divider;
+
+ return value;
+}
+
+static int tosa_bat_get_property(struct power_supply *psy,
+ enum power_supply_property psp,
+ union power_supply_propval *val)
+{
+ int ret = 0;
+ struct tosa_bat *bat = container_of(psy, struct tosa_bat, psy);
+
+ if (bat->is_present && !bat->is_present(bat)
+ && psp != POWER_SUPPLY_PROP_PRESENT) {
+ return -ENODEV;
+ }
+
+ switch (psp) {
+ case POWER_SUPPLY_PROP_STATUS:
+ val->intval = bat->status;
+ break;
+ case POWER_SUPPLY_PROP_TECHNOLOGY:
+ val->intval = bat->technology;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+ val->intval = tosa_read_bat(bat);
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MAX:
+ if (bat->full_chrg == -1)
+ val->intval = bat->bat_max;
+ else
+ val->intval = bat->full_chrg;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+ val->intval = bat->bat_max;
+ break;
+ case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+ val->intval = bat->bat_min;
+ break;
+ case POWER_SUPPLY_PROP_TEMP:
+ val->intval = tosa_read_temp(bat);
+ break;
+ case POWER_SUPPLY_PROP_PRESENT:
+ val->intval = bat->is_present ? bat->is_present(bat) : 1;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static bool tosa_jacket_bat_is_present(struct tosa_bat *bat)
+{
+ return gpio_get_value(TOSA_GPIO_JACKET_DETECT) == 0;
+}
+
+static void tosa_bat_external_power_changed(struct power_supply *psy)
+{
+ schedule_work(&bat_work);
+}
+
+static irqreturn_t tosa_bat_gpio_isr(int irq, void *data)
+{
+ pr_info("tosa_bat_gpio irq: %d\n", gpio_get_value(irq_to_gpio(irq)));
+ schedule_work(&bat_work);
+ return IRQ_HANDLED;
+}
+
+static void tosa_bat_update(struct tosa_bat *bat)
+{
+ int old;
+ struct power_supply *psy = &bat->psy;
+
+ mutex_lock(&bat->work_lock);
+
+ old = bat->status;
+
+ if (bat->is_present && !bat->is_present(bat)) {
+ printk(KERN_NOTICE "%s not present\n", psy->name);
+ bat->status = POWER_SUPPLY_STATUS_UNKNOWN;
+ bat->full_chrg = -1;
+ } else if (power_supply_am_i_supplied(psy)) {
+ if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) {
+ gpio_set_value(bat->gpio_charge_off, 0);
+ mdelay(15);
+ }
+
+ if (gpio_get_value(bat->gpio_full)) {
+ if (old == POWER_SUPPLY_STATUS_CHARGING ||
+ bat->full_chrg == -1)
+ bat->full_chrg = tosa_read_bat(bat);
+
+ gpio_set_value(bat->gpio_charge_off, 1);
+ bat->status = POWER_SUPPLY_STATUS_FULL;
+ } else {
+ gpio_set_value(bat->gpio_charge_off, 0);
+ bat->status = POWER_SUPPLY_STATUS_CHARGING;
+ }
+ } else {
+ gpio_set_value(bat->gpio_charge_off, 1);
+ bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
+ }
+
+ if (old != bat->status)
+ power_supply_changed(psy);
+
+ mutex_unlock(&bat->work_lock);
+}
+
+static void tosa_bat_work(struct work_struct *work)
+{
+ tosa_bat_update(&tosa_bat_main);
+ tosa_bat_update(&tosa_bat_jacket);
+}
+
+
+static enum power_supply_property tosa_bat_main_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_TEMP,
+ POWER_SUPPLY_PROP_PRESENT,
+};
+
+static enum power_supply_property tosa_bat_bu_props[] = {
+ POWER_SUPPLY_PROP_STATUS,
+ POWER_SUPPLY_PROP_TECHNOLOGY,
+ POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+ POWER_SUPPLY_PROP_VOLTAGE_NOW,
+ POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+ POWER_SUPPLY_PROP_PRESENT,
+};
+
+static struct tosa_bat tosa_bat_main = {
+ .status = POWER_SUPPLY_STATUS_DISCHARGING,
+ .full_chrg = -1,
+ .psy = {
+ .name = "main-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = tosa_bat_main_props,
+ .num_properties = ARRAY_SIZE(tosa_bat_main_props),
+ .get_property = tosa_bat_get_property,
+ .external_power_changed = tosa_bat_external_power_changed,
+ .use_for_apm = 1,
+ },
+
+ .gpio_full = TOSA_GPIO_BAT0_CRG,
+ .gpio_charge_off = TOSA_GPIO_CHARGE_OFF,
+
+ .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
+
+ .gpio_bat = TOSA_GPIO_BAT0_V_ON,
+ .adc_bat = WM97XX_AUX_ID3,
+ .adc_bat_divider = 414,
+ .bat_max = 4310000,
+ .bat_min = 1551 * 1000000 / 414,
+
+ .gpio_temp = TOSA_GPIO_BAT1_TH_ON,
+ .adc_temp = WM97XX_AUX_ID2,
+ .adc_temp_divider = 10000,
+};
+
+static struct tosa_bat tosa_bat_jacket = {
+ .status = POWER_SUPPLY_STATUS_DISCHARGING,
+ .full_chrg = -1,
+ .psy = {
+ .name = "jacket-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = tosa_bat_main_props,
+ .num_properties = ARRAY_SIZE(tosa_bat_main_props),
+ .get_property = tosa_bat_get_property,
+ .external_power_changed = tosa_bat_external_power_changed,
+ },
+
+ .is_present = tosa_jacket_bat_is_present,
+ .gpio_full = TOSA_GPIO_BAT1_CRG,
+ .gpio_charge_off = TOSA_GPIO_CHARGE_OFF_JC,
+
+ .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
+
+ .gpio_bat = TOSA_GPIO_BAT1_V_ON,
+ .adc_bat = WM97XX_AUX_ID3,
+ .adc_bat_divider = 414,
+ .bat_max = 4310000,
+ .bat_min = 1551 * 1000000 / 414,
+
+ .gpio_temp = TOSA_GPIO_BAT0_TH_ON,
+ .adc_temp = WM97XX_AUX_ID2,
+ .adc_temp_divider = 10000,
+};
+
+static struct tosa_bat tosa_bat_bu = {
+ .status = POWER_SUPPLY_STATUS_UNKNOWN,
+ .full_chrg = -1,
+
+ .psy = {
+ .name = "backup-battery",
+ .type = POWER_SUPPLY_TYPE_BATTERY,
+ .properties = tosa_bat_bu_props,
+ .num_properties = ARRAY_SIZE(tosa_bat_bu_props),
+ .get_property = tosa_bat_get_property,
+ .external_power_changed = tosa_bat_external_power_changed,
+ },
+
+ .gpio_full = -1,
+ .gpio_charge_off = -1,
+
+ .technology = POWER_SUPPLY_TECHNOLOGY_LiMn,
+
+ .gpio_bat = TOSA_GPIO_BU_CHRG_ON,
+ .adc_bat = WM97XX_AUX_ID4,
+ .adc_bat_divider = 1266,
+
+ .gpio_temp = -1,
+ .adc_temp = -1,
+ .adc_temp_divider = -1,
+};
+
+static struct {
+ int gpio;
+ char *name;
+ bool output;
+ int value;
+} gpios[] = {
+ { TOSA_GPIO_CHARGE_OFF, "main charge off", 1, 1 },
+ { TOSA_GPIO_CHARGE_OFF_JC, "jacket charge off", 1, 1 },
+ { TOSA_GPIO_BAT_SW_ON, "battery switch", 1, 0 },
+ { TOSA_GPIO_BAT0_V_ON, "main battery", 1, 0 },
+ { TOSA_GPIO_BAT1_V_ON, "jacket battery", 1, 0 },
+ { TOSA_GPIO_BAT1_TH_ON, "main battery temp", 1, 0 },
+ { TOSA_GPIO_BAT0_TH_ON, "jacket battery temp", 1, 0 },
+ { TOSA_GPIO_BU_CHRG_ON, "backup battery", 1, 0 },
+ { TOSA_GPIO_BAT0_CRG, "main battery full", 0, 0 },
+ { TOSA_GPIO_BAT1_CRG, "jacket battery full", 0, 0 },
+ { TOSA_GPIO_BAT0_LOW, "main battery low", 0, 0 },
+ { TOSA_GPIO_BAT1_LOW, "jacket battery low", 0, 0 },
+ { TOSA_GPIO_JACKET_DETECT, "jacket detect", 0, 0 },
+};
+
+#ifdef CONFIG_PM
+static int tosa_bat_suspend(struct platform_device *dev, pm_message_t state)
+{
+ /* flush all pending status updates */
+ flush_scheduled_work();
+ return 0;
+}
+
+static int tosa_bat_resume(struct platform_device *dev)
+{
+ /* things may have changed while we were away */
+ schedule_work(&bat_work);
+ return 0;
+}
+#else
+#define tosa_bat_suspend NULL
+#define tosa_bat_resume NULL
+#endif
+
+static int __devinit tosa_bat_probe(struct platform_device *dev)
+{
+ int ret;
+ int i;
+
+ if (!machine_is_tosa())
+ return -ENODEV;
+
+ for (i = 0; i < ARRAY_SIZE(gpios); i++) {
+ ret = gpio_request(gpios[i].gpio, gpios[i].name);
+ if (ret) {
+ i--;
+ goto err_gpio;
+ }
+
+ if (gpios[i].output)
+ ret = gpio_direction_output(gpios[i].gpio,
+ gpios[i].value);
+ else
+ ret = gpio_direction_input(gpios[i].gpio);
+
+ if (ret)
+ goto err_gpio;
+ }
+
+ mutex_init(&tosa_bat_main.work_lock);
+ mutex_init(&tosa_bat_jacket.work_lock);
+
+ INIT_WORK(&bat_work, tosa_bat_work);
+
+ ret = power_supply_register(&dev->dev, &tosa_bat_main.psy);
+ if (ret)
+ goto err_psy_reg_main;
+ ret = power_supply_register(&dev->dev, &tosa_bat_jacket.psy);
+ if (ret)
+ goto err_psy_reg_jacket;
+ ret = power_supply_register(&dev->dev, &tosa_bat_bu.psy);
+ if (ret)
+ goto err_psy_reg_bu;
+
+ ret = request_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG),
+ tosa_bat_gpio_isr,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "main full", &tosa_bat_main);
+ if (ret)
+ goto err_req_main;
+
+ ret = request_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG),
+ tosa_bat_gpio_isr,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "jacket full", &tosa_bat_jacket);
+ if (ret)
+ goto err_req_jacket;
+
+ ret = request_irq(gpio_to_irq(TOSA_GPIO_JACKET_DETECT),
+ tosa_bat_gpio_isr,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "jacket detect", &tosa_bat_jacket);
+ if (!ret) {
+ schedule_work(&bat_work);
+ return 0;
+ }
+
+ free_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG), &tosa_bat_jacket);
+err_req_jacket:
+ free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
+err_req_main:
+ power_supply_unregister(&tosa_bat_bu.psy);
+err_psy_reg_bu:
+ power_supply_unregister(&tosa_bat_jacket.psy);
+err_psy_reg_jacket:
+ power_supply_unregister(&tosa_bat_main.psy);
+err_psy_reg_main:
+
+ /* see comment in tosa_bat_remove */
+ flush_scheduled_work();
+
+ i--;
+err_gpio:
+ for (; i >= 0; i--)
+ gpio_free(gpios[i].gpio);
+
+ return ret;
+}
+
+static int __devexit tosa_bat_remove(struct platform_device *dev)
+{
+ int i;
+
+ free_irq(gpio_to_irq(TOSA_GPIO_JACKET_DETECT), &tosa_bat_jacket);
+ free_irq(gpio_to_irq(TOSA_GPIO_BAT1_CRG), &tosa_bat_jacket);
+ free_irq(gpio_to_irq(TOSA_GPIO_BAT0_CRG), &tosa_bat_main);
+
+ power_supply_unregister(&tosa_bat_bu.psy);
+ power_supply_unregister(&tosa_bat_jacket.psy);
+ power_supply_unregister(&tosa_bat_main.psy);
+
+ /*
+ * now flush all pending work.
+ * we won't get any more schedules, since all
+ * sources (isr and external_power_changed)
+ * are unregistered now.
+ */
+ flush_scheduled_work();
+
+ for (i = ARRAY_SIZE(gpios) - 1; i >= 0; i--)
+ gpio_free(gpios[i].gpio);
+
+ return 0;
+}
+
+static struct platform_driver tosa_bat_driver = {
+ .driver.name = "wm97xx-battery",
+ .driver.owner = THIS_MODULE,
+ .probe = tosa_bat_probe,
+ .remove = __devexit_p(tosa_bat_remove),
+ .suspend = tosa_bat_suspend,
+ .resume = tosa_bat_resume,
+};
+
+static int __init tosa_bat_init(void)
+{
+ return platform_driver_register(&tosa_bat_driver);
+}
+
+static void __exit tosa_bat_exit(void)
+{
+ platform_driver_unregister(&tosa_bat_driver);
+}
+
+module_init(tosa_bat_init);
+module_exit(tosa_bat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dmitry Baryshkov");
+MODULE_DESCRIPTION("Tosa battery driver");
+MODULE_ALIAS("platform:wm97xx-battery");
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
new file mode 100644
index 00000000000..a656128f1fd
--- /dev/null
+++ b/drivers/regulator/Kconfig
@@ -0,0 +1,59 @@
+menu "Voltage and Current regulators"
+
+config REGULATOR
+ bool "Voltage and Current Regulator Support"
+ default n
+ help
+ Generic Voltage and Current Regulator support.
+
+ This framework is designed to provide a generic interface to voltage
+ and current regulators within the Linux kernel. It's intended to
+ provide voltage and current control to client or consumer drivers and
+ also provide status information to user space applications through a
+ sysfs interface.
+
+ The intention is to allow systems to dynamically control regulator
+ output in order to save power and prolong battery life. This applies
+ to both voltage regulators (where voltage output is controllable) and
+ current sinks (where current output is controllable).
+
+ This framework safely compiles out if not selected so that client
+ drivers can still be used in systems with no software controllable
+ regulators.
+
+ If unsure, say no.
+
+config REGULATOR_DEBUG
+ bool "Regulator debug support"
+ depends on REGULATOR
+ help
+ Say yes here to enable debugging support.
+
+config REGULATOR_FIXED_VOLTAGE
+ tristate
+ default n
+ select REGULATOR
+
+config REGULATOR_VIRTUAL_CONSUMER
+ tristate "Virtual regulator consumer support"
+ default n
+ select REGULATOR
+ help
+ This driver provides a virtual consumer for the voltage and
+ current regulator API which provides sysfs controls for
+ configuring the supplies requested. This is mainly useful
+ for test purposes.
+
+ If unsure, say no.
+
+config REGULATOR_BQ24022
+ tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
+ default n
+ select REGULATOR
+ help
+ This driver controls a TI bq24022 Charger attached via
+ GPIOs. The provided current regulator can enable/disable
+ charging select between 100 mA and 500 mA charging current
+ limit.
+
+endmenu
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
new file mode 100644
index 00000000000..ac2c64efe65
--- /dev/null
+++ b/drivers/regulator/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for regulator drivers.
+#
+
+
+obj-$(CONFIG_REGULATOR) += core.o
+obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
+obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
+
+obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+
+ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
new file mode 100644
index 00000000000..263699d6152
--- /dev/null
+++ b/drivers/regulator/bq24022.c
@@ -0,0 +1,167 @@
+/*
+ * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
+ * 1-Cell Li-Ion Charger connected via GPIOs.
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/regulator/bq24022.h>
+#include <linux/regulator/driver.h>
+
+static int bq24022_set_current_limit(struct regulator_dev *rdev,
+ int min_uA, int max_uA)
+{
+ struct platform_device *pdev = rdev_get_drvdata(rdev);
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+ dev_dbg(&pdev->dev, "setting current limit to %s mA\n",
+ max_uA >= 500000 ? "500" : "100");
+
+ /* REVISIT: maybe return error if min_uA != 0 ? */
+ gpio_set_value(pdata->gpio_iset2, max_uA >= 500000);
+ return 0;
+}
+
+static int bq24022_get_current_limit(struct regulator_dev *rdev)
+{
+ struct platform_device *pdev = rdev_get_drvdata(rdev);
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+ return gpio_get_value(pdata->gpio_iset2) ? 500000 : 100000;
+}
+
+static int bq24022_enable(struct regulator_dev *rdev)
+{
+ struct platform_device *pdev = rdev_get_drvdata(rdev);
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+ dev_dbg(&pdev->dev, "enabling charger\n");
+
+ gpio_set_value(pdata->gpio_nce, 0);
+ return 0;
+}
+
+static int bq24022_disable(struct regulator_dev *rdev)
+{
+ struct platform_device *pdev = rdev_get_drvdata(rdev);
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+ dev_dbg(&pdev->dev, "disabling charger\n");
+
+ gpio_set_value(pdata->gpio_nce, 1);
+ return 0;
+}
+
+static int bq24022_is_enabled(struct regulator_dev *rdev)
+{
+ struct platform_device *pdev = rdev_get_drvdata(rdev);
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+ return !gpio_get_value(pdata->gpio_nce);
+}
+
+static struct regulator_ops bq24022_ops = {
+ .set_current_limit = bq24022_set_current_limit,
+ .get_current_limit = bq24022_get_current_limit,
+ .enable = bq24022_enable,
+ .disable = bq24022_disable,
+ .is_enabled = bq24022_is_enabled,
+};
+
+static struct regulator_desc bq24022_desc = {
+ .name = "bq24022",
+ .ops = &bq24022_ops,
+ .type = REGULATOR_CURRENT,
+};
+
+static int __init bq24022_probe(struct platform_device *pdev)
+{
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+ struct regulator_dev *bq24022;
+ int ret;
+
+ if (!pdata || !pdata->gpio_nce || !pdata->gpio_iset2)
+ return -EINVAL;
+
+ ret = gpio_request(pdata->gpio_nce, "ncharge_en");
+ if (ret) {
+ dev_dbg(&pdev->dev, "couldn't request nCE GPIO: %d\n",
+ pdata->gpio_nce);
+ goto err_ce;
+ }
+ ret = gpio_request(pdata->gpio_iset2, "charge_mode");
+ if (ret) {
+ dev_dbg(&pdev->dev, "couldn't request ISET2 GPIO: %d\n",
+ pdata->gpio_iset2);
+ goto err_iset2;
+ }
+ ret = gpio_direction_output(pdata->gpio_iset2, 0);
+ ret = gpio_direction_output(pdata->gpio_nce, 1);
+
+ bq24022 = regulator_register(&bq24022_desc, pdev);
+ if (IS_ERR(bq24022)) {
+ dev_dbg(&pdev->dev, "couldn't register regulator\n");
+ ret = PTR_ERR(bq24022);
+ goto err_reg;
+ }
+ platform_set_drvdata(pdev, bq24022);
+ dev_dbg(&pdev->dev, "registered regulator\n");
+
+ return 0;
+err_reg:
+ gpio_free(pdata->gpio_iset2);
+err_iset2:
+ gpio_free(pdata->gpio_nce);
+err_ce:
+ return ret;
+}
+
+static int __devexit bq24022_remove(struct platform_device *pdev)
+{
+ struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+ struct regulator_dev *bq24022 = platform_get_drvdata(pdev);
+
+ regulator_unregister(bq24022);
+ gpio_free(pdata->gpio_iset2);
+ gpio_free(pdata->gpio_nce);
+
+ return 0;
+}
+
+static struct platform_driver bq24022_driver = {
+ .driver = {
+ .name = "bq24022",
+ },
+ .remove = __devexit_p(bq24022_remove),
+};
+
+static int __init bq24022_init(void)
+{
+ return platform_driver_probe(&bq24022_driver, bq24022_probe);
+}
+
+static void __exit bq24022_exit(void)
+{
+ platform_driver_unregister(&bq24022_driver);
+}
+
+/*
+ * make sure this is probed before gpio_vbus and pda_power,
+ * but after asic3 or other GPIO expander drivers.
+ */
+subsys_initcall(bq24022_init);
+module_exit(bq24022_exit);
+
+MODULE_AUTHOR("Philipp Zabel");
+MODULE_DESCRIPTION("TI bq24022 Li-Ion Charger driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
new file mode 100644
index 00000000000..9c798626156
--- /dev/null
+++ b/drivers/regulator/core.c
@@ -0,0 +1,1903 @@
+/*
+ * core.c -- Voltage/Current Regulator framework.
+ *
+ * Copyright 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <liam.girdwood@wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/suspend.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+
+#define REGULATOR_VERSION "0.5"
+
+static DEFINE_MUTEX(regulator_list_mutex);
+static LIST_HEAD(regulator_list);
+static LIST_HEAD(regulator_map_list);
+
+/**
+ * struct regulator_dev
+ *
+ * Voltage / Current regulator class device. One for each regulator.
+ */
+struct regulator_dev {
+ struct regulator_desc *desc;
+ int use_count;
+
+ /* lists we belong to */
+ struct list_head list; /* list of all regulators */
+ struct list_head slist; /* list of supplied regulators */
+
+ /* lists we own */
+ struct list_head consumer_list; /* consumers we supply */
+ struct list_head supply_list; /* regulators we supply */
+
+ struct blocking_notifier_head notifier;
+ struct mutex mutex; /* consumer lock */
+ struct module *owner;
+ struct device dev;
+ struct regulation_constraints *constraints;
+ struct regulator_dev *supply; /* for tree */
+
+ void *reg_data; /* regulator_dev data */
+};
+
+/**
+ * struct regulator_map
+ *
+ * Used to provide symbolic supply names to devices.
+ */
+struct regulator_map {
+ struct list_head list;
+ struct device *dev;
+ const char *supply;
+ const char *regulator;
+};
+
+static inline struct regulator_dev *to_rdev(struct device *d)
+{
+ return container_of(d, struct regulator_dev, dev);
+}
+
+/*
+ * struct regulator
+ *
+ * One for each consumer device.
+ */
+struct regulator {
+ struct device *dev;
+ struct list_head list;
+ int uA_load;
+ int min_uV;
+ int max_uV;
+ int enabled; /* client has called enabled */
+ char *supply_name;
+ struct device_attribute dev_attr;
+ struct regulator_dev *rdev;
+};
+
+static int _regulator_is_enabled(struct regulator_dev *rdev);
+static int _regulator_disable(struct regulator_dev *rdev);
+static int _regulator_get_voltage(struct regulator_dev *rdev);
+static int _regulator_get_current_limit(struct regulator_dev *rdev);
+static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
+static void _notifier_call_chain(struct regulator_dev *rdev,
+ unsigned long event, void *data);
+
+/* gets the regulator for a given consumer device */
+static struct regulator *get_device_regulator(struct device *dev)
+{
+ struct regulator *regulator = NULL;
+ struct regulator_dev *rdev;
+
+ mutex_lock(&regulator_list_mutex);
+ list_for_each_entry(rdev, &regulator_list, list) {
+ mutex_lock(&rdev->mutex);
+ list_for_each_entry(regulator, &rdev->consumer_list, list) {
+ if (regulator->dev == dev) {
+ mutex_unlock(&rdev->mutex);
+ mutex_unlock(&regulator_list_mutex);
+ return regulator;
+ }
+ }
+ mutex_unlock(&rdev->mutex);
+ }
+ mutex_unlock(&regulator_list_mutex);
+ return NULL;
+}
+
+/* Platform voltage constraint check */
+static int regulator_check_voltage(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV)
+{
+ BUG_ON(*min_uV > *max_uV);
+
+ if (!rdev->constraints) {
+ printk(KERN_ERR "%s: no constraints for %s\n", __func__,
+ rdev->desc->name);
+ return -ENODEV;
+ }
+ if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+ printk(KERN_ERR "%s: operation not allowed for %s\n",
+ __func__, rdev->desc->name);
+ return -EPERM;
+ }
+
+ if (*max_uV > rdev->constraints->max_uV)
+ *max_uV = rdev->constraints->max_uV;
+ if (*min_uV < rdev->constraints->min_uV)
+ *min_uV = rdev->constraints->min_uV;
+
+ if (*min_uV > *max_uV)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* current constraint check */
+static int regulator_check_current_limit(struct regulator_dev *rdev,
+ int *min_uA, int *max_uA)
+{
+ BUG_ON(*min_uA > *max_uA);
+
+ if (!rdev->constraints) {
+ printk(KERN_ERR "%s: no constraints for %s\n", __func__,
+ rdev->desc->name);
+ return -ENODEV;
+ }
+ if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_CURRENT)) {
+ printk(KERN_ERR "%s: operation not allowed for %s\n",
+ __func__, rdev->desc->name);
+ return -EPERM;
+ }
+
+ if (*max_uA > rdev->constraints->max_uA)
+ *max_uA = rdev->constraints->max_uA;
+ if (*min_uA < rdev->constraints->min_uA)
+ *min_uA = rdev->constraints->min_uA;
+
+ if (*min_uA > *max_uA)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* operating mode constraint check */
+static int regulator_check_mode(struct regulator_dev *rdev, int mode)
+{
+ if (!rdev->constraints) {
+ printk(KERN_ERR "%s: no constraints for %s\n", __func__,
+ rdev->desc->name);
+ return -ENODEV;
+ }
+ if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_MODE)) {
+ printk(KERN_ERR "%s: operation not allowed for %s\n",
+ __func__, rdev->desc->name);
+ return -EPERM;
+ }
+ if (!(rdev->constraints->valid_modes_mask & mode)) {
+ printk(KERN_ERR "%s: invalid mode %x for %s\n",
+ __func__, mode, rdev->desc->name);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* dynamic regulator mode switching constraint check */
+static int regulator_check_drms(struct regulator_dev *rdev)
+{
+ if (!rdev->constraints) {
+ printk(KERN_ERR "%s: no constraints for %s\n", __func__,
+ rdev->desc->name);
+ return -ENODEV;
+ }
+ if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) {
+ printk(KERN_ERR "%s: operation not allowed for %s\n",
+ __func__, rdev->desc->name);
+ return -EPERM;
+ }
+ return 0;
+}
+
+static ssize_t device_requested_uA_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator *regulator;
+
+ regulator = get_device_regulator(dev);
+ if (regulator == NULL)
+ return 0;
+
+ return sprintf(buf, "%d\n", regulator->uA_load);
+}
+
+static ssize_t regulator_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ ssize_t ret;
+
+ mutex_lock(&rdev->mutex);
+ ret = sprintf(buf, "%d\n", _regulator_get_voltage(rdev));
+ mutex_unlock(&rdev->mutex);
+
+ return ret;
+}
+
+static ssize_t regulator_uA_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev));
+}
+
+static ssize_t regulator_opmode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ int mode = _regulator_get_mode(rdev);
+
+ switch (mode) {
+ case REGULATOR_MODE_FAST:
+ return sprintf(buf, "fast\n");
+ case REGULATOR_MODE_NORMAL:
+ return sprintf(buf, "normal\n");
+ case REGULATOR_MODE_IDLE:
+ return sprintf(buf, "idle\n");
+ case REGULATOR_MODE_STANDBY:
+ return sprintf(buf, "standby\n");
+ }
+ return sprintf(buf, "unknown\n");
+}
+
+static ssize_t regulator_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ int state = _regulator_is_enabled(rdev);
+
+ if (state > 0)
+ return sprintf(buf, "enabled\n");
+ else if (state == 0)
+ return sprintf(buf, "disabled\n");
+ else
+ return sprintf(buf, "unknown\n");
+}
+
+static ssize_t regulator_min_uA_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "constraint not defined\n");
+
+ return sprintf(buf, "%d\n", rdev->constraints->min_uA);
+}
+
+static ssize_t regulator_max_uA_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "constraint not defined\n");
+
+ return sprintf(buf, "%d\n", rdev->constraints->max_uA);
+}
+
+static ssize_t regulator_min_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "constraint not defined\n");
+
+ return sprintf(buf, "%d\n", rdev->constraints->min_uV);
+}
+
+static ssize_t regulator_max_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "constraint not defined\n");
+
+ return sprintf(buf, "%d\n", rdev->constraints->max_uV);
+}
+
+static ssize_t regulator_total_uA_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ struct regulator *regulator;
+ int uA = 0;
+
+ mutex_lock(&rdev->mutex);
+ list_for_each_entry(regulator, &rdev->consumer_list, list)
+ uA += regulator->uA_load;
+ mutex_unlock(&rdev->mutex);
+ return sprintf(buf, "%d\n", uA);
+}
+
+static ssize_t regulator_num_users_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ return sprintf(buf, "%d\n", rdev->use_count);
+}
+
+static ssize_t regulator_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ switch (rdev->desc->type) {
+ case REGULATOR_VOLTAGE:
+ return sprintf(buf, "voltage\n");
+ case REGULATOR_CURRENT:
+ return sprintf(buf, "current\n");
+ }
+ return sprintf(buf, "unknown\n");
+}
+
+static ssize_t regulator_suspend_mem_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV);
+}
+
+static ssize_t regulator_suspend_disk_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV);
+}
+
+static ssize_t regulator_suspend_standby_uV_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV);
+}
+
+static ssize_t suspend_opmode_show(struct regulator_dev *rdev,
+ unsigned int mode, char *buf)
+{
+ switch (mode) {
+ case REGULATOR_MODE_FAST:
+ return sprintf(buf, "fast\n");
+ case REGULATOR_MODE_NORMAL:
+ return sprintf(buf, "normal\n");
+ case REGULATOR_MODE_IDLE:
+ return sprintf(buf, "idle\n");
+ case REGULATOR_MODE_STANDBY:
+ return sprintf(buf, "standby\n");
+ }
+ return sprintf(buf, "unknown\n");
+}
+
+static ssize_t regulator_suspend_mem_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return suspend_opmode_show(rdev,
+ rdev->constraints->state_mem.mode, buf);
+}
+
+static ssize_t regulator_suspend_disk_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return suspend_opmode_show(rdev,
+ rdev->constraints->state_disk.mode, buf);
+}
+
+static ssize_t regulator_suspend_standby_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+ return suspend_opmode_show(rdev,
+ rdev->constraints->state_standby.mode, buf);
+}
+
+static ssize_t regulator_suspend_mem_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+
+ if (rdev->constraints->state_mem.enabled)
+ return sprintf(buf, "enabled\n");
+ else
+ return sprintf(buf, "disabled\n");
+}
+
+static ssize_t regulator_suspend_disk_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+
+ if (rdev->constraints->state_disk.enabled)
+ return sprintf(buf, "enabled\n");
+ else
+ return sprintf(buf, "disabled\n");
+}
+
+static ssize_t regulator_suspend_standby_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+
+ if (!rdev->constraints)
+ return sprintf(buf, "not defined\n");
+
+ if (rdev->constraints->state_standby.enabled)
+ return sprintf(buf, "enabled\n");
+ else
+ return sprintf(buf, "disabled\n");
+}
+static struct device_attribute regulator_dev_attrs[] = {
+ __ATTR(microvolts, 0444, regulator_uV_show, NULL),
+ __ATTR(microamps, 0444, regulator_uA_show, NULL),
+ __ATTR(opmode, 0444, regulator_opmode_show, NULL),
+ __ATTR(state, 0444, regulator_state_show, NULL),
+ __ATTR(min_microvolts, 0444, regulator_min_uV_show, NULL),
+ __ATTR(min_microamps, 0444, regulator_min_uA_show, NULL),
+ __ATTR(max_microvolts, 0444, regulator_max_uV_show, NULL),
+ __ATTR(max_microamps, 0444, regulator_max_uA_show, NULL),
+ __ATTR(requested_microamps, 0444, regulator_total_uA_show, NULL),
+ __ATTR(num_users, 0444, regulator_num_users_show, NULL),
+ __ATTR(type, 0444, regulator_type_show, NULL),
+ __ATTR(suspend_mem_microvolts, 0444,
+ regulator_suspend_mem_uV_show, NULL),
+ __ATTR(suspend_disk_microvolts, 0444,
+ regulator_suspend_disk_uV_show, NULL),
+ __ATTR(suspend_standby_microvolts, 0444,
+ regulator_suspend_standby_uV_show, NULL),
+ __ATTR(suspend_mem_mode, 0444,
+ regulator_suspend_mem_mode_show, NULL),
+ __ATTR(suspend_disk_mode, 0444,
+ regulator_suspend_disk_mode_show, NULL),
+ __ATTR(suspend_standby_mode, 0444,
+ regulator_suspend_standby_mode_show, NULL),
+ __ATTR(suspend_mem_state, 0444,
+ regulator_suspend_mem_state_show, NULL),
+ __ATTR(suspend_disk_state, 0444,
+ regulator_suspend_disk_state_show, NULL),
+ __ATTR(suspend_standby_state, 0444,
+ regulator_suspend_standby_state_show, NULL),
+ __ATTR_NULL,
+};
+
+static void regulator_dev_release(struct device *dev)
+{
+ struct regulator_dev *rdev = to_rdev(dev);
+ kfree(rdev);
+}
+
+static struct class regulator_class = {
+ .name = "regulator",
+ .dev_release = regulator_dev_release,
+ .dev_attrs = regulator_dev_attrs,
+};
+
+/* Calculate the new optimum regulator operating mode based on the new total
+ * consumer load. All locks held by caller */
+static void drms_uA_update(struct regulator_dev *rdev)
+{
+ struct regulator *sibling;
+ int current_uA = 0, output_uV, input_uV, err;
+ unsigned int mode;
+
+ err = regulator_check_drms(rdev);
+ if (err < 0 || !rdev->desc->ops->get_optimum_mode ||
+ !rdev->desc->ops->get_voltage || !rdev->desc->ops->set_mode);
+ return;
+
+ /* get output voltage */
+ output_uV = rdev->desc->ops->get_voltage(rdev);
+ if (output_uV <= 0)
+ return;
+
+ /* get input voltage */
+ if (rdev->supply && rdev->supply->desc->ops->get_voltage)
+ input_uV = rdev->supply->desc->ops->get_voltage(rdev->supply);
+ else
+ input_uV = rdev->constraints->input_uV;
+ if (input_uV <= 0)
+ return;
+
+ /* calc total requested load */
+ list_for_each_entry(sibling, &rdev->consumer_list, list)
+ current_uA += sibling->uA_load;
+
+ /* now get the optimum mode for our new total regulator load */
+ mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
+ output_uV, current_uA);
+
+ /* check the new mode is allowed */
+ err = regulator_check_mode(rdev, mode);
+ if (err == 0)
+ rdev->desc->ops->set_mode(rdev, mode);
+}
+
+static int suspend_set_state(struct regulator_dev *rdev,
+ struct regulator_state *rstate)
+{
+ int ret = 0;
+
+ /* enable & disable are mandatory for suspend control */
+ if (!rdev->desc->ops->set_suspend_enable ||
+ !rdev->desc->ops->set_suspend_disable)
+ return -EINVAL;
+
+ if (rstate->enabled)
+ ret = rdev->desc->ops->set_suspend_enable(rdev);
+ else
+ ret = rdev->desc->ops->set_suspend_disable(rdev);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to enabled/disable\n", __func__);
+ return ret;
+ }
+
+ if (rdev->desc->ops->set_suspend_voltage && rstate->uV > 0) {
+ ret = rdev->desc->ops->set_suspend_voltage(rdev, rstate->uV);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to set voltage\n",
+ __func__);
+ return ret;
+ }
+ }
+
+ if (rdev->desc->ops->set_suspend_mode && rstate->mode > 0) {
+ ret = rdev->desc->ops->set_suspend_mode(rdev, rstate->mode);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to set mode\n", __func__);
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/* locks held by caller */
+static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state)
+{
+ if (!rdev->constraints)
+ return -EINVAL;
+
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ return suspend_set_state(rdev,
+ &rdev->constraints->state_standby);
+ case PM_SUSPEND_MEM:
+ return suspend_set_state(rdev,
+ &rdev->constraints->state_mem);
+ case PM_SUSPEND_MAX:
+ return suspend_set_state(rdev,
+ &rdev->constraints->state_disk);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void print_constraints(struct regulator_dev *rdev)
+{
+ struct regulation_constraints *constraints = rdev->constraints;
+ char buf[80];
+ int count;
+
+ if (rdev->desc->type == REGULATOR_VOLTAGE) {
+ if (constraints->min_uV == constraints->max_uV)
+ count = sprintf(buf, "%d mV ",
+ constraints->min_uV / 1000);
+ else
+ count = sprintf(buf, "%d <--> %d mV ",
+ constraints->min_uV / 1000,
+ constraints->max_uV / 1000);
+ } else {
+ if (constraints->min_uA == constraints->max_uA)
+ count = sprintf(buf, "%d mA ",
+ constraints->min_uA / 1000);
+ else
+ count = sprintf(buf, "%d <--> %d mA ",
+ constraints->min_uA / 1000,
+ constraints->max_uA / 1000);
+ }
+ if (constraints->valid_modes_mask & REGULATOR_MODE_FAST)
+ count += sprintf(buf + count, "fast ");
+ if (constraints->valid_modes_mask & REGULATOR_MODE_NORMAL)
+ count += sprintf(buf + count, "normal ");
+ if (constraints->valid_modes_mask & REGULATOR_MODE_IDLE)
+ count += sprintf(buf + count, "idle ");
+ if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY)
+ count += sprintf(buf + count, "standby");
+
+ printk(KERN_INFO "regulator: %s: %s\n", rdev->desc->name, buf);
+}
+
+#define REG_STR_SIZE 32
+
+static struct regulator *create_regulator(struct regulator_dev *rdev,
+ struct device *dev,
+ const char *supply_name)
+{
+ struct regulator *regulator;
+ char buf[REG_STR_SIZE];
+ int err, size;
+
+ regulator = kzalloc(sizeof(*regulator), GFP_KERNEL);
+ if (regulator == NULL)
+ return NULL;
+
+ mutex_lock(&rdev->mutex);
+ regulator->rdev = rdev;
+ list_add(&regulator->list, &rdev->consumer_list);
+
+ if (dev) {
+ /* create a 'requested_microamps_name' sysfs entry */
+ size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s",
+ supply_name);
+ if (size >= REG_STR_SIZE)
+ goto overflow_err;
+
+ regulator->dev = dev;
+ regulator->dev_attr.attr.name = kstrdup(buf, GFP_KERNEL);
+ if (regulator->dev_attr.attr.name == NULL)
+ goto attr_name_err;
+
+ regulator->dev_attr.attr.owner = THIS_MODULE;
+ regulator->dev_attr.attr.mode = 0444;
+ regulator->dev_attr.show = device_requested_uA_show;
+ err = device_create_file(dev, &regulator->dev_attr);
+ if (err < 0) {
+ printk(KERN_WARNING "%s: could not add regulator_dev"
+ " load sysfs\n", __func__);
+ goto attr_name_err;
+ }
+
+ /* also add a link to the device sysfs entry */
+ size = scnprintf(buf, REG_STR_SIZE, "%s-%s",
+ dev->kobj.name, supply_name);
+ if (size >= REG_STR_SIZE)
+ goto attr_err;
+
+ regulator->supply_name = kstrdup(buf, GFP_KERNEL);
+ if (regulator->supply_name == NULL)
+ goto attr_err;
+
+ err = sysfs_create_link(&rdev->dev.kobj, &dev->kobj,
+ buf);
+ if (err) {
+ printk(KERN_WARNING
+ "%s: could not add device link %s err %d\n",
+ __func__, dev->kobj.name, err);
+ device_remove_file(dev, &regulator->dev_attr);
+ goto link_name_err;
+ }
+ }
+ mutex_unlock(&rdev->mutex);
+ return regulator;
+link_name_err:
+ kfree(regulator->supply_name);
+attr_err:
+ device_remove_file(regulator->dev, &regulator->dev_attr);
+attr_name_err:
+ kfree(regulator->dev_attr.attr.name);
+overflow_err:
+ list_del(&regulator->list);
+ kfree(regulator);
+ mutex_unlock(&rdev->mutex);
+ return NULL;
+}
+
+/**
+ * regulator_get - lookup and obtain a reference to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno. Use of supply names
+ * configured via regulator_set_device_supply() is strongly
+ * encouraged.
+ */
+struct regulator *regulator_get(struct device *dev, const char *id)
+{
+ struct regulator_dev *rdev;
+ struct regulator_map *map;
+ struct regulator *regulator = ERR_PTR(-ENODEV);
+ const char *supply = id;
+
+ if (id == NULL) {
+ printk(KERN_ERR "regulator: get() with no identifier\n");
+ return regulator;
+ }
+
+ mutex_lock(&regulator_list_mutex);
+
+ list_for_each_entry(map, &regulator_map_list, list) {
+ if (dev == map->dev &&
+ strcmp(map->supply, id) == 0) {
+ supply = map->regulator;
+ break;
+ }
+ }
+
+ list_for_each_entry(rdev, &regulator_list, list) {
+ if (strcmp(supply, rdev->desc->name) == 0 &&
+ try_module_get(rdev->owner))
+ goto found;
+ }
+ printk(KERN_ERR "regulator: Unable to get requested regulator: %s\n",
+ id);
+ mutex_unlock(&regulator_list_mutex);
+ return regulator;
+
+found:
+ regulator = create_regulator(rdev, dev, id);
+ if (regulator == NULL) {
+ regulator = ERR_PTR(-ENOMEM);
+ module_put(rdev->owner);
+ }
+
+ mutex_unlock(&regulator_list_mutex);
+ return regulator;
+}
+EXPORT_SYMBOL_GPL(regulator_get);
+
+/**
+ * regulator_put - "free" the regulator source
+ * @regulator: regulator source
+ *
+ * Note: drivers must ensure that all regulator_enable calls made on this
+ * regulator source are balanced by regulator_disable calls prior to calling
+ * this function.
+ */
+void regulator_put(struct regulator *regulator)
+{
+ struct regulator_dev *rdev;
+
+ if (regulator == NULL || IS_ERR(regulator))
+ return;
+
+ if (regulator->enabled) {
+ printk(KERN_WARNING "Releasing supply %s while enabled\n",
+ regulator->supply_name);
+ WARN_ON(regulator->enabled);
+ regulator_disable(regulator);
+ }
+
+ mutex_lock(&regulator_list_mutex);
+ rdev = regulator->rdev;
+
+ /* remove any sysfs entries */
+ if (regulator->dev) {
+ sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
+ kfree(regulator->supply_name);
+ device_remove_file(regulator->dev, &regulator->dev_attr);
+ kfree(regulator->dev_attr.attr.name);
+ }
+ list_del(&regulator->list);
+ kfree(regulator);
+
+ module_put(rdev->owner);
+ mutex_unlock(&regulator_list_mutex);
+}
+EXPORT_SYMBOL_GPL(regulator_put);
+
+/* locks held by regulator_enable() */
+static int _regulator_enable(struct regulator_dev *rdev)
+{
+ int ret = -EINVAL;
+
+ if (!rdev->constraints) {
+ printk(KERN_ERR "%s: %s has no constraints\n",
+ __func__, rdev->desc->name);
+ return ret;
+ }
+
+ /* do we need to enable the supply regulator first */
+ if (rdev->supply) {
+ ret = _regulator_enable(rdev->supply);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to enable %s: %d\n",
+ __func__, rdev->desc->name, ret);
+ return ret;
+ }
+ }
+
+ /* check voltage and requested load before enabling */
+ if (rdev->desc->ops->enable) {
+
+ if (rdev->constraints &&
+ (rdev->constraints->valid_ops_mask &
+ REGULATOR_CHANGE_DRMS))
+ drms_uA_update(rdev);
+
+ ret = rdev->desc->ops->enable(rdev);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to enable %s: %d\n",
+ __func__, rdev->desc->name, ret);
+ return ret;
+ }
+ rdev->use_count++;
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * regulator_enable - enable regulator output
+ * @regulator: regulator source
+ *
+ * Enable the regulator output at the predefined voltage or current value.
+ * NOTE: the output value can be set by other drivers, boot loader or may be
+ * hardwired in the regulator.
+ * NOTE: calls to regulator_enable() must be balanced with calls to
+ * regulator_disable().
+ */
+int regulator_enable(struct regulator *regulator)
+{
+ int ret;
+
+ if (regulator->enabled) {
+ printk(KERN_CRIT "Regulator %s already enabled\n",
+ regulator->supply_name);
+ WARN_ON(regulator->enabled);
+ return 0;
+ }
+
+ mutex_lock(&regulator->rdev->mutex);
+ regulator->enabled = 1;
+ ret = _regulator_enable(regulator->rdev);
+ if (ret != 0)
+ regulator->enabled = 0;
+ mutex_unlock(&regulator->rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_enable);
+
+/* locks held by regulator_disable() */
+static int _regulator_disable(struct regulator_dev *rdev)
+{
+ int ret = 0;
+
+ /* are we the last user and permitted to disable ? */
+ if (rdev->use_count == 1 && !rdev->constraints->always_on) {
+
+ /* we are last user */
+ if (rdev->desc->ops->disable) {
+ ret = rdev->desc->ops->disable(rdev);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to disable %s\n",
+ __func__, rdev->desc->name);
+ return ret;
+ }
+ }
+
+ /* decrease our supplies ref count and disable if required */
+ if (rdev->supply)
+ _regulator_disable(rdev->supply);
+
+ rdev->use_count = 0;
+ } else if (rdev->use_count > 1) {
+
+ if (rdev->constraints &&
+ (rdev->constraints->valid_ops_mask &
+ REGULATOR_CHANGE_DRMS))
+ drms_uA_update(rdev);
+
+ rdev->use_count--;
+ }
+ return ret;
+}
+
+/**
+ * regulator_disable - disable regulator output
+ * @regulator: regulator source
+ *
+ * Disable the regulator output voltage or current.
+ * NOTE: this will only disable the regulator output if no other consumer
+ * devices have it enabled.
+ * NOTE: calls to regulator_enable() must be balanced with calls to
+ * regulator_disable().
+ */
+int regulator_disable(struct regulator *regulator)
+{
+ int ret;
+
+ if (!regulator->enabled) {
+ printk(KERN_ERR "%s: not in use by this consumer\n",
+ __func__);
+ return 0;
+ }
+
+ mutex_lock(&regulator->rdev->mutex);
+ regulator->enabled = 0;
+ regulator->uA_load = 0;
+ ret = _regulator_disable(regulator->rdev);
+ mutex_unlock(&regulator->rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_disable);
+
+/* locks held by regulator_force_disable() */
+static int _regulator_force_disable(struct regulator_dev *rdev)
+{
+ int ret = 0;
+
+ /* force disable */
+ if (rdev->desc->ops->disable) {
+ /* ah well, who wants to live forever... */
+ ret = rdev->desc->ops->disable(rdev);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to force disable %s\n",
+ __func__, rdev->desc->name);
+ return ret;
+ }
+ /* notify other consumers that power has been forced off */
+ _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE,
+ NULL);
+ }
+
+ /* decrease our supplies ref count and disable if required */
+ if (rdev->supply)
+ _regulator_disable(rdev->supply);
+
+ rdev->use_count = 0;
+ return ret;
+}
+
+/**
+ * regulator_force_disable - force disable regulator output
+ * @regulator: regulator source
+ *
+ * Forcibly disable the regulator output voltage or current.
+ * NOTE: this *will* disable the regulator output even if other consumer
+ * devices have it enabled. This should be used for situations when device
+ * damage will likely occur if the regulator is not disabled (e.g. over temp).
+ */
+int regulator_force_disable(struct regulator *regulator)
+{
+ int ret;
+
+ mutex_lock(&regulator->rdev->mutex);
+ regulator->enabled = 0;
+ regulator->uA_load = 0;
+ ret = _regulator_force_disable(regulator->rdev);
+ mutex_unlock(&regulator->rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_force_disable);
+
+static int _regulator_is_enabled(struct regulator_dev *rdev)
+{
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->is_enabled) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdev->desc->ops->is_enabled(rdev);
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+
+/**
+ * regulator_is_enabled - is the regulator output enabled
+ * @regulator: regulator source
+ *
+ * Returns zero for disabled otherwise return number of enable requests.
+ */
+int regulator_is_enabled(struct regulator *regulator)
+{
+ return _regulator_is_enabled(regulator->rdev);
+}
+EXPORT_SYMBOL_GPL(regulator_is_enabled);
+
+/**
+ * regulator_set_voltage - set regulator output voltage
+ * @regulator: regulator source
+ * @min_uV: Minimum required voltage in uV
+ * @max_uV: Maximum acceptable voltage in uV
+ *
+ * Sets a voltage regulator to the desired output voltage. This can be set
+ * during any regulator state. IOW, regulator can be disabled or enabled.
+ *
+ * If the regulator is enabled then the voltage will change to the new value
+ * immediately otherwise if the regulator is disabled the regulator will
+ * output at the new voltage when enabled.
+ *
+ * NOTE: If the regulator is shared between several devices then the lowest
+ * request voltage that meets the system constraints will be used.
+ * NOTE: Regulator system constraints must be set for this regulator before
+ * calling this function otherwise this call will fail.
+ */
+int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->set_voltage) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* constraints check */
+ ret = regulator_check_voltage(rdev, &min_uV, &max_uV);
+ if (ret < 0)
+ goto out;
+ regulator->min_uV = min_uV;
+ regulator->max_uV = max_uV;
+ ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV);
+
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_voltage);
+
+static int _regulator_get_voltage(struct regulator_dev *rdev)
+{
+ /* sanity check */
+ if (rdev->desc->ops->get_voltage)
+ return rdev->desc->ops->get_voltage(rdev);
+ else
+ return -EINVAL;
+}
+
+/**
+ * regulator_get_voltage - get regulator output voltage
+ * @regulator: regulator source
+ *
+ * This returns the current regulator voltage in uV.
+ *
+ * NOTE: If the regulator is disabled it will return the voltage value. This
+ * function should not be used to determine regulator state.
+ */
+int regulator_get_voltage(struct regulator *regulator)
+{
+ int ret;
+
+ mutex_lock(&regulator->rdev->mutex);
+
+ ret = _regulator_get_voltage(regulator->rdev);
+
+ mutex_unlock(&regulator->rdev->mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_get_voltage);
+
+/**
+ * regulator_set_current_limit - set regulator output current limit
+ * @regulator: regulator source
+ * @min_uA: Minimuum supported current in uA
+ * @max_uA: Maximum supported current in uA
+ *
+ * Sets current sink to the desired output current. This can be set during
+ * any regulator state. IOW, regulator can be disabled or enabled.
+ *
+ * If the regulator is enabled then the current will change to the new value
+ * immediately otherwise if the regulator is disabled the regulator will
+ * output at the new current when enabled.
+ *
+ * NOTE: Regulator system constraints must be set for this regulator before
+ * calling this function otherwise this call will fail.
+ */
+int regulator_set_current_limit(struct regulator *regulator,
+ int min_uA, int max_uA)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->set_current_limit) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* constraints check */
+ ret = regulator_check_current_limit(rdev, &min_uA, &max_uA);
+ if (ret < 0)
+ goto out;
+
+ ret = rdev->desc->ops->set_current_limit(rdev, min_uA, max_uA);
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_current_limit);
+
+static int _regulator_get_current_limit(struct regulator_dev *rdev)
+{
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->get_current_limit) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdev->desc->ops->get_current_limit(rdev);
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+
+/**
+ * regulator_get_current_limit - get regulator output current
+ * @regulator: regulator source
+ *
+ * This returns the current supplied by the specified current sink in uA.
+ *
+ * NOTE: If the regulator is disabled it will return the current value. This
+ * function should not be used to determine regulator state.
+ */
+int regulator_get_current_limit(struct regulator *regulator)
+{
+ return _regulator_get_current_limit(regulator->rdev);
+}
+EXPORT_SYMBOL_GPL(regulator_get_current_limit);
+
+/**
+ * regulator_set_mode - set regulator operating mode
+ * @regulator: regulator source
+ * @mode: operating mode - one of the REGULATOR_MODE constants
+ *
+ * Set regulator operating mode to increase regulator efficiency or improve
+ * regulation performance.
+ *
+ * NOTE: Regulator system constraints must be set for this regulator before
+ * calling this function otherwise this call will fail.
+ */
+int regulator_set_mode(struct regulator *regulator, unsigned int mode)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->set_mode) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* constraints check */
+ ret = regulator_check_mode(rdev, mode);
+ if (ret < 0)
+ goto out;
+
+ ret = rdev->desc->ops->set_mode(rdev, mode);
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_mode);
+
+static unsigned int _regulator_get_mode(struct regulator_dev *rdev)
+{
+ int ret;
+
+ mutex_lock(&rdev->mutex);
+
+ /* sanity check */
+ if (!rdev->desc->ops->get_mode) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdev->desc->ops->get_mode(rdev);
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+
+/**
+ * regulator_get_mode - get regulator operating mode
+ * @regulator: regulator source
+ *
+ * Get the current regulator operating mode.
+ */
+unsigned int regulator_get_mode(struct regulator *regulator)
+{
+ return _regulator_get_mode(regulator->rdev);
+}
+EXPORT_SYMBOL_GPL(regulator_get_mode);
+
+/**
+ * regulator_set_optimum_mode - set regulator optimum operating mode
+ * @regulator: regulator source
+ * @uA_load: load current
+ *
+ * Notifies the regulator core of a new device load. This is then used by
+ * DRMS (if enabled by constraints) to set the most efficient regulator
+ * operating mode for the new regulator loading.
+ *
+ * Consumer devices notify their supply regulator of the maximum power
+ * they will require (can be taken from device datasheet in the power
+ * consumption tables) when they change operational status and hence power
+ * state. Examples of operational state changes that can affect power
+ * consumption are :-
+ *
+ * o Device is opened / closed.
+ * o Device I/O is about to begin or has just finished.
+ * o Device is idling in between work.
+ *
+ * This information is also exported via sysfs to userspace.
+ *
+ * DRMS will sum the total requested load on the regulator and change
+ * to the most efficient operating mode if platform constraints allow.
+ *
+ * Returns the new regulator mode or error.
+ */
+int regulator_set_optimum_mode(struct regulator *regulator, int uA_load)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+ struct regulator *consumer;
+ int ret, output_uV, input_uV, total_uA_load = 0;
+ unsigned int mode;
+
+ mutex_lock(&rdev->mutex);
+
+ regulator->uA_load = uA_load;
+ ret = regulator_check_drms(rdev);
+ if (ret < 0)
+ goto out;
+ ret = -EINVAL;
+
+ /* sanity check */
+ if (!rdev->desc->ops->get_optimum_mode)
+ goto out;
+
+ /* get output voltage */
+ output_uV = rdev->desc->ops->get_voltage(rdev);
+ if (output_uV <= 0) {
+ printk(KERN_ERR "%s: invalid output voltage found for %s\n",
+ __func__, rdev->desc->name);
+ goto out;
+ }
+
+ /* get input voltage */
+ if (rdev->supply && rdev->supply->desc->ops->get_voltage)
+ input_uV = rdev->supply->desc->ops->get_voltage(rdev->supply);
+ else
+ input_uV = rdev->constraints->input_uV;
+ if (input_uV <= 0) {
+ printk(KERN_ERR "%s: invalid input voltage found for %s\n",
+ __func__, rdev->desc->name);
+ goto out;
+ }
+
+ /* calc total requested load for this regulator */
+ list_for_each_entry(consumer, &rdev->consumer_list, list)
+ total_uA_load += consumer->uA_load;
+
+ mode = rdev->desc->ops->get_optimum_mode(rdev,
+ input_uV, output_uV,
+ total_uA_load);
+ if (ret <= 0) {
+ printk(KERN_ERR "%s: failed to get optimum mode for %s @"
+ " %d uA %d -> %d uV\n", __func__, rdev->desc->name,
+ total_uA_load, input_uV, output_uV);
+ goto out;
+ }
+
+ ret = rdev->desc->ops->set_mode(rdev, mode);
+ if (ret <= 0) {
+ printk(KERN_ERR "%s: failed to set optimum mode %x for %s\n",
+ __func__, mode, rdev->desc->name);
+ goto out;
+ }
+ ret = mode;
+out:
+ mutex_unlock(&rdev->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_optimum_mode);
+
+/**
+ * regulator_register_notifier - register regulator event notifier
+ * @regulator: regulator source
+ * @notifier_block: notifier block
+ *
+ * Register notifier block to receive regulator events.
+ */
+int regulator_register_notifier(struct regulator *regulator,
+ struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&regulator->rdev->notifier,
+ nb);
+}
+EXPORT_SYMBOL_GPL(regulator_register_notifier);
+
+/**
+ * regulator_unregister_notifier - unregister regulator event notifier
+ * @regulator: regulator source
+ * @notifier_block: notifier block
+ *
+ * Unregister regulator event notifier block.
+ */
+int regulator_unregister_notifier(struct regulator *regulator,
+ struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&regulator->rdev->notifier,
+ nb);
+}
+EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
+
+/* notify regulator consumers and downstream regulator consumers */
+static void _notifier_call_chain(struct regulator_dev *rdev,
+ unsigned long event, void *data)
+{
+ struct regulator_dev *_rdev;
+
+ /* call rdev chain first */
+ mutex_lock(&rdev->mutex);
+ blocking_notifier_call_chain(&rdev->notifier, event, NULL);
+ mutex_unlock(&rdev->mutex);
+
+ /* now notify regulator we supply */
+ list_for_each_entry(_rdev, &rdev->supply_list, slist)
+ _notifier_call_chain(_rdev, event, data);
+}
+
+/**
+ * regulator_bulk_get - get multiple regulator consumers
+ *
+ * @dev: Device to supply
+ * @num_consumers: Number of consumers to register
+ * @consumers: Configuration of consumers; clients are stored here.
+ *
+ * @return 0 on success, an errno on failure.
+ *
+ * This helper function allows drivers to get several regulator
+ * consumers in one operation. If any of the regulators cannot be
+ * acquired then any regulators that were allocated will be freed
+ * before returning to the caller.
+ */
+int regulator_bulk_get(struct device *dev, int num_consumers,
+ struct regulator_bulk_data *consumers)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < num_consumers; i++)
+ consumers[i].consumer = NULL;
+
+ for (i = 0; i < num_consumers; i++) {
+ consumers[i].consumer = regulator_get(dev,
+ consumers[i].supply);
+ if (IS_ERR(consumers[i].consumer)) {
+ dev_err(dev, "Failed to get supply '%s'\n",
+ consumers[i].supply);
+ ret = PTR_ERR(consumers[i].consumer);
+ consumers[i].consumer = NULL;
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < num_consumers && consumers[i].consumer; i++)
+ regulator_put(consumers[i].consumer);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_get);
+
+/**
+ * regulator_bulk_enable - enable multiple regulator consumers
+ *
+ * @num_consumers: Number of consumers
+ * @consumers: Consumer data; clients are stored here.
+ * @return 0 on success, an errno on failure
+ *
+ * This convenience API allows consumers to enable multiple regulator
+ * clients in a single API call. If any consumers cannot be enabled
+ * then any others that were enabled will be disabled again prior to
+ * return.
+ */
+int regulator_bulk_enable(int num_consumers,
+ struct regulator_bulk_data *consumers)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < num_consumers; i++) {
+ ret = regulator_enable(consumers[i].consumer);
+ if (ret != 0)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ printk(KERN_ERR "Failed to enable %s\n", consumers[i].supply);
+ for (i = 0; i < num_consumers; i++)
+ regulator_disable(consumers[i].consumer);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_enable);
+
+/**
+ * regulator_bulk_disable - disable multiple regulator consumers
+ *
+ * @num_consumers: Number of consumers
+ * @consumers: Consumer data; clients are stored here.
+ * @return 0 on success, an errno on failure
+ *
+ * This convenience API allows consumers to disable multiple regulator
+ * clients in a single API call. If any consumers cannot be enabled
+ * then any others that were disabled will be disabled again prior to
+ * return.
+ */
+int regulator_bulk_disable(int num_consumers,
+ struct regulator_bulk_data *consumers)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < num_consumers; i++) {
+ ret = regulator_disable(consumers[i].consumer);
+ if (ret != 0)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ printk(KERN_ERR "Failed to disable %s\n", consumers[i].supply);
+ for (i = 0; i < num_consumers; i++)
+ regulator_enable(consumers[i].consumer);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_disable);
+
+/**
+ * regulator_bulk_free - free multiple regulator consumers
+ *
+ * @num_consumers: Number of consumers
+ * @consumers: Consumer data; clients are stored here.
+ *
+ * This convenience API allows consumers to free multiple regulator
+ * clients in a single API call.
+ */
+void regulator_bulk_free(int num_consumers,
+ struct regulator_bulk_data *consumers)
+{
+ int i;
+
+ for (i = 0; i < num_consumers; i++) {
+ regulator_put(consumers[i].consumer);
+ consumers[i].consumer = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_free);
+
+/**
+ * regulator_notifier_call_chain - call regulator event notifier
+ * @regulator: regulator source
+ * @event: notifier block
+ * @data:
+ *
+ * Called by regulator drivers to notify clients a regulator event has
+ * occurred. We also notify regulator clients downstream.
+ */
+int regulator_notifier_call_chain(struct regulator_dev *rdev,
+ unsigned long event, void *data)
+{
+ _notifier_call_chain(rdev, event, data);
+ return NOTIFY_DONE;
+
+}
+EXPORT_SYMBOL_GPL(regulator_notifier_call_chain);
+
+/**
+ * regulator_register - register regulator
+ * @regulator: regulator source
+ * @reg_data: private regulator data
+ *
+ * Called by regulator drivers to register a regulator.
+ * Returns 0 on success.
+ */
+struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+ void *reg_data)
+{
+ static atomic_t regulator_no = ATOMIC_INIT(0);
+ struct regulator_dev *rdev;
+ int ret;
+
+ if (regulator_desc == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (regulator_desc->name == NULL || regulator_desc->ops == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (!regulator_desc->type == REGULATOR_VOLTAGE &&
+ !regulator_desc->type == REGULATOR_CURRENT)
+ return ERR_PTR(-EINVAL);
+
+ rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL);
+ if (rdev == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&regulator_list_mutex);
+
+ mutex_init(&rdev->mutex);
+ rdev->reg_data = reg_data;
+ rdev->owner = regulator_desc->owner;
+ rdev->desc = regulator_desc;
+ INIT_LIST_HEAD(&rdev->consumer_list);
+ INIT_LIST_HEAD(&rdev->supply_list);
+ INIT_LIST_HEAD(&rdev->list);
+ INIT_LIST_HEAD(&rdev->slist);
+ BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
+
+ rdev->dev.class = &regulator_class;
+ device_initialize(&rdev->dev);
+ snprintf(rdev->dev.bus_id, sizeof(rdev->dev.bus_id),
+ "regulator_%ld_%s",
+ (unsigned long)atomic_inc_return(&regulator_no) - 1,
+ regulator_desc->name);
+
+ ret = device_add(&rdev->dev);
+ if (ret == 0)
+ list_add(&rdev->list, &regulator_list);
+ else {
+ kfree(rdev);
+ rdev = ERR_PTR(ret);
+ }
+ mutex_unlock(&regulator_list_mutex);
+ return rdev;
+}
+EXPORT_SYMBOL_GPL(regulator_register);
+
+/**
+ * regulator_unregister - unregister regulator
+ * @regulator: regulator source
+ *
+ * Called by regulator drivers to unregister a regulator.
+ */
+void regulator_unregister(struct regulator_dev *rdev)
+{
+ if (rdev == NULL)
+ return;
+
+ mutex_lock(&regulator_list_mutex);
+ list_del(&rdev->list);
+ if (rdev->supply)
+ sysfs_remove_link(&rdev->dev.kobj, "supply");
+ device_unregister(&rdev->dev);
+ mutex_unlock(&regulator_list_mutex);
+}
+EXPORT_SYMBOL_GPL(regulator_unregister);
+
+/**
+ * regulator_set_supply - set regulator supply regulator
+ * @regulator: regulator name
+ * @supply: supply regulator name
+ *
+ * Called by platform initialisation code to set the supply regulator for this
+ * regulator. This ensures that a regulators supply will also be enabled by the
+ * core if it's child is enabled.
+ */
+int regulator_set_supply(const char *regulator, const char *supply)
+{
+ struct regulator_dev *rdev, *supply_rdev;
+ int err;
+
+ if (regulator == NULL || supply == NULL)
+ return -EINVAL;
+
+ mutex_lock(&regulator_list_mutex);
+
+ list_for_each_entry(rdev, &regulator_list, list) {
+ if (!strcmp(rdev->desc->name, regulator))
+ goto found_regulator;
+ }
+ mutex_unlock(&regulator_list_mutex);
+ return -ENODEV;
+
+found_regulator:
+ list_for_each_entry(supply_rdev, &regulator_list, list) {
+ if (!strcmp(supply_rdev->desc->name, supply))
+ goto found_supply;
+ }
+ mutex_unlock(&regulator_list_mutex);
+ return -ENODEV;
+
+found_supply:
+ err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj,
+ "supply");
+ if (err) {
+ printk(KERN_ERR
+ "%s: could not add device link %s err %d\n",
+ __func__, supply_rdev->dev.kobj.name, err);
+ goto out;
+ }
+ rdev->supply = supply_rdev;
+ list_add(&rdev->slist, &supply_rdev->supply_list);
+out:
+ mutex_unlock(&regulator_list_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(regulator_set_supply);
+
+/**
+ * regulator_get_supply - get regulator supply regulator
+ * @regulator: regulator name
+ *
+ * Returns the supply supply regulator name or NULL if no supply regulator
+ * exists (i.e the regulator is supplied directly from USB, Line, Battery, etc)
+ */
+const char *regulator_get_supply(const char *regulator)
+{
+ struct regulator_dev *rdev;
+
+ if (regulator == NULL)
+ return NULL;
+
+ mutex_lock(&regulator_list_mutex);
+ list_for_each_entry(rdev, &regulator_list, list) {
+ if (!strcmp(rdev->desc->name, regulator))
+ goto found;
+ }
+ mutex_unlock(&regulator_list_mutex);
+ return NULL;
+
+found:
+ mutex_unlock(&regulator_list_mutex);
+ if (rdev->supply)
+ return rdev->supply->desc->name;
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(regulator_get_supply);
+
+/**
+ * regulator_set_machine_constraints - sets regulator constraints
+ * @regulator: regulator source
+ *
+ * Allows platform initialisation code to define and constrain
+ * regulator circuits e.g. valid voltage/current ranges, etc. NOTE:
+ * Constraints *must* be set by platform code in order for some
+ * regulator operations to proceed i.e. set_voltage, set_current_limit,
+ * set_mode.
+ */
+int regulator_set_machine_constraints(const char *regulator_name,
+ struct regulation_constraints *constraints)
+{
+ struct regulator_dev *rdev;
+ int ret = 0;
+
+ if (regulator_name == NULL)
+ return -EINVAL;
+
+ mutex_lock(&regulator_list_mutex);
+
+ list_for_each_entry(rdev, &regulator_list, list) {
+ if (!strcmp(regulator_name, rdev->desc->name))
+ goto found;
+ }
+ ret = -ENODEV;
+ goto out;
+
+found:
+ mutex_lock(&rdev->mutex);
+ rdev->constraints = constraints;
+
+ /* do we need to apply the constraint voltage */
+ if (rdev->constraints->apply_uV &&
+ rdev->constraints->min_uV == rdev->constraints->max_uV &&
+ rdev->desc->ops->set_voltage) {
+ ret = rdev->desc->ops->set_voltage(rdev,
+ rdev->constraints->min_uV, rdev->constraints->max_uV);
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to apply %duV"
+ " constraint\n", __func__,
+ rdev->constraints->min_uV);
+ rdev->constraints = NULL;
+ goto out;
+ }
+ }
+
+ /* are we enabled at boot time by firmware / bootloader */
+ if (rdev->constraints->boot_on)
+ rdev->use_count = 1;
+
+ /* do we need to setup our suspend state */
+ if (constraints->initial_state)
+ ret = suspend_prepare(rdev, constraints->initial_state);
+
+ print_constraints(rdev);
+ mutex_unlock(&rdev->mutex);
+
+out:
+ mutex_unlock(&regulator_list_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_machine_constraints);
+
+
+/**
+ * regulator_set_device_supply: Bind a regulator to a symbolic supply
+ * @regulator: regulator source
+ * @dev: device the supply applies to
+ * @supply: symbolic name for supply
+ *
+ * Allows platform initialisation code to map physical regulator
+ * sources to symbolic names for supplies for use by devices. Devices
+ * should use these symbolic names to request regulators, avoiding the
+ * need to provide board-specific regulator names as platform data.
+ */
+int regulator_set_device_supply(const char *regulator, struct device *dev,
+ const char *supply)
+{
+ struct regulator_map *node;
+
+ if (regulator == NULL || supply == NULL)
+ return -EINVAL;
+
+ node = kmalloc(sizeof(struct regulator_map), GFP_KERNEL);
+ if (node == NULL)
+ return -ENOMEM;
+
+ node->regulator = regulator;
+ node->dev = dev;
+ node->supply = supply;
+
+ mutex_lock(&regulator_list_mutex);
+ list_add(&node->list, &regulator_map_list);
+ mutex_unlock(&regulator_list_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(regulator_set_device_supply);
+
+/**
+ * regulator_suspend_prepare: prepare regulators for system wide suspend
+ * @state: system suspend state
+ *
+ * Configure each regulator with it's suspend operating parameters for state.
+ * This will usually be called by machine suspend code prior to supending.
+ */
+int regulator_suspend_prepare(suspend_state_t state)
+{
+ struct regulator_dev *rdev;
+ int ret = 0;
+
+ /* ON is handled by regulator active state */
+ if (state == PM_SUSPEND_ON)
+ return -EINVAL;
+
+ mutex_lock(&regulator_list_mutex);
+ list_for_each_entry(rdev, &regulator_list, list) {
+
+ mutex_lock(&rdev->mutex);
+ ret = suspend_prepare(rdev, state);
+ mutex_unlock(&rdev->mutex);
+
+ if (ret < 0) {
+ printk(KERN_ERR "%s: failed to prepare %s\n",
+ __func__, rdev->desc->name);
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&regulator_list_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_suspend_prepare);
+
+/**
+ * rdev_get_drvdata - get rdev regulator driver data
+ * @regulator: regulator
+ *
+ * Get rdev regulator driver private data. This call can be used in the
+ * regulator driver context.
+ */
+void *rdev_get_drvdata(struct regulator_dev *rdev)
+{
+ return rdev->reg_data;
+}
+EXPORT_SYMBOL_GPL(rdev_get_drvdata);
+
+/**
+ * regulator_get_drvdata - get regulator driver data
+ * @regulator: regulator
+ *
+ * Get regulator driver private data. This call can be used in the consumer
+ * driver context when non API regulator specific functions need to be called.
+ */
+void *regulator_get_drvdata(struct regulator *regulator)
+{
+ return regulator->rdev->reg_data;
+}
+EXPORT_SYMBOL_GPL(regulator_get_drvdata);
+
+/**
+ * regulator_set_drvdata - set regulator driver data
+ * @regulator: regulator
+ * @data: data
+ */
+void regulator_set_drvdata(struct regulator *regulator, void *data)
+{
+ regulator->rdev->reg_data = data;
+}
+EXPORT_SYMBOL_GPL(regulator_set_drvdata);
+
+/**
+ * regulator_get_id - get regulator ID
+ * @regulator: regulator
+ */
+int rdev_get_id(struct regulator_dev *rdev)
+{
+ return rdev->desc->id;
+}
+EXPORT_SYMBOL_GPL(rdev_get_id);
+
+static int __init regulator_init(void)
+{
+ printk(KERN_INFO "regulator: core version %s\n", REGULATOR_VERSION);
+ return class_register(&regulator_class);
+}
+
+/* init early to allow our consumers to complete system booting */
+core_initcall(regulator_init);
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
new file mode 100644
index 00000000000..d31db3e1491
--- /dev/null
+++ b/drivers/regulator/fixed.c
@@ -0,0 +1,129 @@
+/*
+ * fixed.c
+ *
+ * Copyright 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This is useful for systems with mixed controllable and
+ * non-controllable regulators, as well as for allowing testing on
+ * systems with no controllable regulators.
+ */
+
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/fixed.h>
+
+struct fixed_voltage_data {
+ struct regulator_desc desc;
+ struct regulator_dev *dev;
+ int microvolts;
+};
+
+static int fixed_voltage_is_enabled(struct regulator_dev *dev)
+{
+ return 1;
+}
+
+static int fixed_voltage_enable(struct regulator_dev *dev)
+{
+ return 0;
+}
+
+static int fixed_voltage_get_voltage(struct regulator_dev *dev)
+{
+ struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+ return data->microvolts;
+}
+
+static struct regulator_ops fixed_voltage_ops = {
+ .is_enabled = fixed_voltage_is_enabled,
+ .enable = fixed_voltage_enable,
+ .get_voltage = fixed_voltage_get_voltage,
+};
+
+static int regulator_fixed_voltage_probe(struct platform_device *pdev)
+{
+ struct fixed_voltage_config *config = pdev->dev.platform_data;
+ struct fixed_voltage_data *drvdata;
+ int ret;
+
+ drvdata = kzalloc(sizeof(struct fixed_voltage_data), GFP_KERNEL);
+ if (drvdata == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ drvdata->desc.name = kstrdup(config->supply_name, GFP_KERNEL);
+ if (drvdata->desc.name == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ drvdata->desc.type = REGULATOR_VOLTAGE;
+ drvdata->desc.owner = THIS_MODULE;
+ drvdata->desc.ops = &fixed_voltage_ops,
+
+ drvdata->microvolts = config->microvolts;
+
+ drvdata->dev = regulator_register(&drvdata->desc, drvdata);
+ if (IS_ERR(drvdata->dev)) {
+ ret = PTR_ERR(drvdata->dev);
+ goto err_name;
+ }
+
+ platform_set_drvdata(pdev, drvdata);
+
+ dev_dbg(&pdev->dev, "%s supplying %duV\n", drvdata->desc.name,
+ drvdata->microvolts);
+
+ return 0;
+
+err_name:
+ kfree(drvdata->desc.name);
+err:
+ kfree(drvdata);
+ return ret;
+}
+
+static int regulator_fixed_voltage_remove(struct platform_device *pdev)
+{
+ struct fixed_voltage_data *drvdata = platform_get_drvdata(pdev);
+
+ regulator_unregister(drvdata->dev);
+ kfree(drvdata->desc.name);
+ kfree(drvdata);
+
+ return 0;
+}
+
+static struct platform_driver regulator_fixed_voltage_driver = {
+ .probe = regulator_fixed_voltage_probe,
+ .remove = regulator_fixed_voltage_remove,
+ .driver = {
+ .name = "reg-fixed-voltage",
+ },
+};
+
+static int __init regulator_fixed_voltage_init(void)
+{
+ return platform_driver_register(&regulator_fixed_voltage_driver);
+}
+module_init(regulator_fixed_voltage_init);
+
+static void __exit regulator_fixed_voltage_exit(void)
+{
+ platform_driver_unregister(&regulator_fixed_voltage_driver);
+}
+module_exit(regulator_fixed_voltage_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("Fixed voltage regulator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
new file mode 100644
index 00000000000..5ddb464b1c3
--- /dev/null
+++ b/drivers/regulator/virtual.c
@@ -0,0 +1,345 @@
+/*
+ * reg-virtual-consumer.c
+ *
+ * Copyright 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ */
+
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+struct virtual_consumer_data {
+ struct mutex lock;
+ struct regulator *regulator;
+ int enabled;
+ int min_uV;
+ int max_uV;
+ int min_uA;
+ int max_uA;
+ unsigned int mode;
+};
+
+static void update_voltage_constraints(struct virtual_consumer_data *data)
+{
+ int ret;
+
+ if (data->min_uV && data->max_uV
+ && data->min_uV <= data->max_uV) {
+ ret = regulator_set_voltage(data->regulator,
+ data->min_uV, data->max_uV);
+ if (ret != 0) {
+ printk(KERN_ERR "regulator_set_voltage() failed: %d\n",
+ ret);
+ return;
+ }
+ }
+
+ if (data->min_uV && data->max_uV && !data->enabled) {
+ ret = regulator_enable(data->regulator);
+ if (ret == 0)
+ data->enabled = 1;
+ else
+ printk(KERN_ERR "regulator_enable() failed: %d\n",
+ ret);
+ }
+
+ if (!(data->min_uV && data->max_uV) && data->enabled) {
+ ret = regulator_disable(data->regulator);
+ if (ret == 0)
+ data->enabled = 0;
+ else
+ printk(KERN_ERR "regulator_disable() failed: %d\n",
+ ret);
+ }
+}
+
+static void update_current_limit_constraints(struct virtual_consumer_data
+ *data)
+{
+ int ret;
+
+ if (data->max_uA
+ && data->min_uA <= data->max_uA) {
+ ret = regulator_set_current_limit(data->regulator,
+ data->min_uA, data->max_uA);
+ if (ret != 0) {
+ pr_err("regulator_set_current_limit() failed: %d\n",
+ ret);
+ return;
+ }
+ }
+
+ if (data->max_uA && !data->enabled) {
+ ret = regulator_enable(data->regulator);
+ if (ret == 0)
+ data->enabled = 1;
+ else
+ printk(KERN_ERR "regulator_enable() failed: %d\n",
+ ret);
+ }
+
+ if (!(data->min_uA && data->max_uA) && data->enabled) {
+ ret = regulator_disable(data->regulator);
+ if (ret == 0)
+ data->enabled = 0;
+ else
+ printk(KERN_ERR "regulator_disable() failed: %d\n",
+ ret);
+ }
+}
+
+static ssize_t show_min_uV(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", data->min_uV);
+}
+
+static ssize_t set_min_uV(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ long val;
+
+ if (strict_strtol(buf, 10, &val) != 0)
+ return count;
+
+ mutex_lock(&data->lock);
+
+ data->min_uV = val;
+ update_voltage_constraints(data);
+
+ mutex_unlock(&data->lock);
+
+ return count;
+}
+
+static ssize_t show_max_uV(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", data->max_uV);
+}
+
+static ssize_t set_max_uV(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ long val;
+
+ if (strict_strtol(buf, 10, &val) != 0)
+ return count;
+
+ mutex_lock(&data->lock);
+
+ data->max_uV = val;
+ update_voltage_constraints(data);
+
+ mutex_unlock(&data->lock);
+
+ return count;
+}
+
+static ssize_t show_min_uA(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", data->min_uA);
+}
+
+static ssize_t set_min_uA(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ long val;
+
+ if (strict_strtol(buf, 10, &val) != 0)
+ return count;
+
+ mutex_lock(&data->lock);
+
+ data->min_uA = val;
+ update_current_limit_constraints(data);
+
+ mutex_unlock(&data->lock);
+
+ return count;
+}
+
+static ssize_t show_max_uA(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ return sprintf(buf, "%d\n", data->max_uA);
+}
+
+static ssize_t set_max_uA(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ long val;
+
+ if (strict_strtol(buf, 10, &val) != 0)
+ return count;
+
+ mutex_lock(&data->lock);
+
+ data->max_uA = val;
+ update_current_limit_constraints(data);
+
+ mutex_unlock(&data->lock);
+
+ return count;
+}
+
+static ssize_t show_mode(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+
+ switch (data->mode) {
+ case REGULATOR_MODE_FAST:
+ return sprintf(buf, "fast\n");
+ case REGULATOR_MODE_NORMAL:
+ return sprintf(buf, "normal\n");
+ case REGULATOR_MODE_IDLE:
+ return sprintf(buf, "idle\n");
+ case REGULATOR_MODE_STANDBY:
+ return sprintf(buf, "standby\n");
+ default:
+ return sprintf(buf, "unknown\n");
+ }
+}
+
+static ssize_t set_mode(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtual_consumer_data *data = dev_get_drvdata(dev);
+ unsigned int mode;
+ int ret;
+
+ if (strncmp(buf, "fast", strlen("fast")) == 0)
+ mode = REGULATOR_MODE_FAST;
+ else if (strncmp(buf, "normal", strlen("normal")) == 0)
+ mode = REGULATOR_MODE_NORMAL;
+ else if (strncmp(buf, "idle", strlen("idle")) == 0)
+ mode = REGULATOR_MODE_IDLE;
+ else if (strncmp(buf, "standby", strlen("standby")) == 0)
+ mode = REGULATOR_MODE_STANDBY;
+ else {
+ dev_err(dev, "Configuring invalid mode\n");
+ return count;
+ }
+
+ mutex_lock(&data->lock);
+ ret = regulator_set_mode(data->regulator, mode);
+ if (ret == 0)
+ data->mode = mode;
+ else
+ dev_err(dev, "Failed to configure mode: %d\n", ret);
+ mutex_unlock(&data->lock);
+
+ return count;
+}
+
+static DEVICE_ATTR(min_microvolts, 0666, show_min_uV, set_min_uV);
+static DEVICE_ATTR(max_microvolts, 0666, show_max_uV, set_max_uV);
+static DEVICE_ATTR(min_microamps, 0666, show_min_uA, set_min_uA);
+static DEVICE_ATTR(max_microamps, 0666, show_max_uA, set_max_uA);
+static DEVICE_ATTR(mode, 0666, show_mode, set_mode);
+
+struct device_attribute *attributes[] = {
+ &dev_attr_min_microvolts,
+ &dev_attr_max_microvolts,
+ &dev_attr_min_microamps,
+ &dev_attr_max_microamps,
+ &dev_attr_mode,
+};
+
+static int regulator_virtual_consumer_probe(struct platform_device *pdev)
+{
+ char *reg_id = pdev->dev.platform_data;
+ struct virtual_consumer_data *drvdata;
+ int ret, i;
+
+ drvdata = kzalloc(sizeof(struct virtual_consumer_data), GFP_KERNEL);
+ if (drvdata == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ mutex_init(&drvdata->lock);
+
+ drvdata->regulator = regulator_get(&pdev->dev, reg_id);
+ if (IS_ERR(drvdata->regulator)) {
+ ret = PTR_ERR(drvdata->regulator);
+ goto err;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(attributes); i++) {
+ ret = device_create_file(&pdev->dev, attributes[i]);
+ if (ret != 0)
+ goto err;
+ }
+
+ drvdata->mode = regulator_get_mode(drvdata->regulator);
+
+ platform_set_drvdata(pdev, drvdata);
+
+ return 0;
+
+err:
+ for (i = 0; i < ARRAY_SIZE(attributes); i++)
+ device_remove_file(&pdev->dev, attributes[i]);
+ kfree(drvdata);
+ return ret;
+}
+
+static int regulator_virtual_consumer_remove(struct platform_device *pdev)
+{
+ struct virtual_consumer_data *drvdata = platform_get_drvdata(pdev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(attributes); i++)
+ device_remove_file(&pdev->dev, attributes[i]);
+ if (drvdata->enabled)
+ regulator_disable(drvdata->regulator);
+ regulator_put(drvdata->regulator);
+
+ kfree(drvdata);
+
+ return 0;
+}
+
+static struct platform_driver regulator_virtual_consumer_driver = {
+ .probe = regulator_virtual_consumer_probe,
+ .remove = regulator_virtual_consumer_remove,
+ .driver = {
+ .name = "reg-virt-consumer",
+ },
+};
+
+
+static int __init regulator_virtual_consumer_init(void)
+{
+ return platform_driver_register(&regulator_virtual_consumer_driver);
+}
+module_init(regulator_virtual_consumer_init);
+
+static void __exit regulator_virtual_consumer_exit(void)
+{
+ platform_driver_unregister(&regulator_virtual_consumer_driver);
+}
+module_exit(regulator_virtual_consumer_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("Virtual regulator consumer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index d397fa5f3a9..7af60b98d8a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -20,7 +20,7 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
if (!rtc->ops)
err = -ENODEV;
@@ -46,7 +46,7 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
if (!rtc->ops)
err = -ENODEV;
@@ -66,7 +66,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
if (!rtc->ops)
err = -ENODEV;
@@ -106,7 +106,7 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
if (rtc->ops == NULL)
err = -ENODEV;
@@ -293,7 +293,7 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
if (!rtc->ops)
err = -ENODEV;
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 0a870b7e5c3..856cc1af40d 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -221,7 +221,7 @@ static long rtc_dev_ioctl(struct file *file,
err = mutex_lock_interruptible(&rtc->ops_lock);
if (err)
- return -EBUSY;
+ return err;
/* check that the calling task has appropriate permissions
* for certain ioctls. doing this check here is useful
@@ -432,6 +432,8 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
clear_uie(rtc);
#endif
+ rtc_irq_set_state(rtc, NULL, 0);
+
if (rtc->ops->release)
rtc->ops->release(rtc->dev.parent);
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 2d8df0b3053..20676cdef4a 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -91,7 +91,8 @@ static struct alias_pav_group *_find_group(struct alias_lcu *lcu,
else
search_unit_addr = uid->base_unit_addr;
list_for_each_entry(pos, &lcu->grouplist, group) {
- if (pos->uid.base_unit_addr == search_unit_addr)
+ if (pos->uid.base_unit_addr == search_unit_addr &&
+ !strncmp(pos->uid.vduit, uid->vduit, sizeof(uid->vduit)))
return pos;
};
return NULL;
@@ -332,6 +333,7 @@ static int _add_device_to_lcu(struct alias_lcu *lcu,
group->uid.base_unit_addr = uid->real_unit_addr;
else
group->uid.base_unit_addr = uid->base_unit_addr;
+ memcpy(group->uid.vduit, uid->vduit, sizeof(uid->vduit));
INIT_LIST_HEAD(&group->group);
INIT_LIST_HEAD(&group->baselist);
INIT_LIST_HEAD(&group->aliaslist);
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index d774e79476f..cd3335c1c30 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -913,7 +913,8 @@ dasd_vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial */ 14 + 1 +\
- /* SSID */ 4 + 1 + /* unit addr */ 2 + 1)
+ /* SSID */ 4 + 1 + /* unit addr */ 2 + 1 +\
+ /* vduit */ 32 + 1)
static ssize_t
dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -945,8 +946,17 @@ dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
sprintf(ua_string, "%02x", uid->real_unit_addr);
break;
}
- snprintf(uid_string, sizeof(uid_string), "%s.%s.%04x.%s",
- uid->vendor, uid->serial, uid->ssid, ua_string);
+ if (strlen(uid->vduit) > 0)
+ snprintf(uid_string, sizeof(uid_string),
+ "%s.%s.%04x.%s.%s",
+ uid->vendor, uid->serial,
+ uid->ssid, ua_string,
+ uid->vduit);
+ else
+ snprintf(uid_string, sizeof(uid_string),
+ "%s.%s.%04x.%s",
+ uid->vendor, uid->serial,
+ uid->ssid, ua_string);
spin_unlock(&dasd_devmap_lock);
return snprintf(buf, PAGE_SIZE, "%s\n", uid_string);
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 3590fdb5b2f..773b3fe275b 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -313,8 +313,8 @@ static int prefix(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, int trk,
memset(pfxdata, 0, sizeof(*pfxdata));
/* prefix data */
pfxdata->format = 0;
- pfxdata->base_address = basepriv->conf_data.ned1.unit_addr;
- pfxdata->base_lss = basepriv->conf_data.ned1.ID;
+ pfxdata->base_address = basepriv->ned->unit_addr;
+ pfxdata->base_lss = basepriv->ned->ID;
pfxdata->validity.define_extend = 1;
/* private uid is kept up to date, conf_data may be outdated */
@@ -536,36 +536,40 @@ dasd_eckd_cdl_reclen(int recid)
/*
* Generate device unique id that specifies the physical device.
*/
-static int
-dasd_eckd_generate_uid(struct dasd_device *device, struct dasd_uid *uid)
+static int dasd_eckd_generate_uid(struct dasd_device *device,
+ struct dasd_uid *uid)
{
struct dasd_eckd_private *private;
- struct dasd_eckd_confdata *confdata;
+ int count;
private = (struct dasd_eckd_private *) device->private;
if (!private)
return -ENODEV;
- confdata = &private->conf_data;
- if (!confdata)
+ if (!private->ned || !private->gneq)
return -ENODEV;
memset(uid, 0, sizeof(struct dasd_uid));
- memcpy(uid->vendor, confdata->ned1.HDA_manufacturer,
+ memcpy(uid->vendor, private->ned->HDA_manufacturer,
sizeof(uid->vendor) - 1);
EBCASC(uid->vendor, sizeof(uid->vendor) - 1);
- memcpy(uid->serial, confdata->ned1.HDA_location,
+ memcpy(uid->serial, private->ned->HDA_location,
sizeof(uid->serial) - 1);
EBCASC(uid->serial, sizeof(uid->serial) - 1);
- uid->ssid = confdata->neq.subsystemID;
- uid->real_unit_addr = confdata->ned1.unit_addr;
- if (confdata->ned2.sneq.flags == 0x40 &&
- confdata->ned2.sneq.format == 0x0001) {
- uid->type = confdata->ned2.sneq.sua_flags;
+ uid->ssid = private->gneq->subsystemID;
+ uid->real_unit_addr = private->ned->unit_addr;;
+ if (private->sneq) {
+ uid->type = private->sneq->sua_flags;
if (uid->type == UA_BASE_PAV_ALIAS)
- uid->base_unit_addr = confdata->ned2.sneq.base_unit_addr;
+ uid->base_unit_addr = private->sneq->base_unit_addr;
} else {
uid->type = UA_BASE_DEVICE;
}
+ if (private->vdsneq) {
+ for (count = 0; count < 16; count++) {
+ sprintf(uid->vduit+2*count, "%02x",
+ private->vdsneq->uit[count]);
+ }
+ }
return 0;
}
@@ -623,6 +627,15 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device,
ret = -ENOMEM;
goto out_error;
}
+
+ /*
+ * buffer has to start with EBCDIC "V1.0" to show
+ * support for virtual device SNEQ
+ */
+ rcd_buf[0] = 0xE5;
+ rcd_buf[1] = 0xF1;
+ rcd_buf[2] = 0x4B;
+ rcd_buf[3] = 0xF0;
cqr = dasd_eckd_build_rcd_lpm(device, rcd_buf, ciw, lpm);
if (IS_ERR(cqr)) {
ret = PTR_ERR(cqr);
@@ -646,8 +659,62 @@ out_error:
return ret;
}
-static int
-dasd_eckd_read_conf(struct dasd_device *device)
+static int dasd_eckd_identify_conf_parts(struct dasd_eckd_private *private)
+{
+
+ struct dasd_sneq *sneq;
+ int i, count;
+
+ private->ned = NULL;
+ private->sneq = NULL;
+ private->vdsneq = NULL;
+ private->gneq = NULL;
+ count = private->conf_len / sizeof(struct dasd_sneq);
+ sneq = (struct dasd_sneq *)private->conf_data;
+ for (i = 0; i < count; ++i) {
+ if (sneq->flags.identifier == 1 && sneq->format == 1)
+ private->sneq = sneq;
+ else if (sneq->flags.identifier == 1 && sneq->format == 4)
+ private->vdsneq = (struct vd_sneq *)sneq;
+ else if (sneq->flags.identifier == 2)
+ private->gneq = (struct dasd_gneq *)sneq;
+ else if (sneq->flags.identifier == 3 && sneq->res1 == 1)
+ private->ned = (struct dasd_ned *)sneq;
+ sneq++;
+ }
+ if (!private->ned || !private->gneq) {
+ private->ned = NULL;
+ private->sneq = NULL;
+ private->vdsneq = NULL;
+ private->gneq = NULL;
+ return -EINVAL;
+ }
+ return 0;
+
+};
+
+static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len)
+{
+ struct dasd_gneq *gneq;
+ int i, count, found;
+
+ count = conf_len / sizeof(*gneq);
+ gneq = (struct dasd_gneq *)conf_data;
+ found = 0;
+ for (i = 0; i < count; ++i) {
+ if (gneq->flags.identifier == 2) {
+ found = 1;
+ break;
+ }
+ gneq++;
+ }
+ if (found)
+ return ((char *)gneq)[18] & 0x07;
+ else
+ return 0;
+}
+
+static int dasd_eckd_read_conf(struct dasd_device *device)
{
void *conf_data;
int conf_len, conf_data_saved;
@@ -661,7 +728,6 @@ dasd_eckd_read_conf(struct dasd_device *device)
path_data->opm = ccw_device_get_path_mask(device->cdev);
lpm = 0x80;
conf_data_saved = 0;
-
/* get configuration data per operational path */
for (lpm = 0x80; lpm; lpm>>= 1) {
if (lpm & path_data->opm){
@@ -678,22 +744,20 @@ dasd_eckd_read_conf(struct dasd_device *device)
"data retrieved");
continue; /* no error */
}
- if (conf_len != sizeof(struct dasd_eckd_confdata)) {
- MESSAGE(KERN_WARNING,
- "sizes of configuration data mismatch"
- "%d (read) vs %ld (expected)",
- conf_len,
- sizeof(struct dasd_eckd_confdata));
- kfree(conf_data);
- continue; /* no error */
- }
/* save first valid configuration data */
- if (!conf_data_saved){
- memcpy(&private->conf_data, conf_data,
- sizeof(struct dasd_eckd_confdata));
+ if (!conf_data_saved) {
+ kfree(private->conf_data);
+ private->conf_data = conf_data;
+ private->conf_len = conf_len;
+ if (dasd_eckd_identify_conf_parts(private)) {
+ private->conf_data = NULL;
+ private->conf_len = 0;
+ kfree(conf_data);
+ continue;
+ }
conf_data_saved++;
}
- switch (((char *)conf_data)[242] & 0x07){
+ switch (dasd_eckd_path_access(conf_data, conf_len)) {
case 0x02:
path_data->npm |= lpm;
break;
@@ -701,7 +765,8 @@ dasd_eckd_read_conf(struct dasd_device *device)
path_data->ppm |= lpm;
break;
}
- kfree(conf_data);
+ if (conf_data != private->conf_data)
+ kfree(conf_data);
}
}
return 0;
@@ -952,6 +1017,7 @@ out_err2:
dasd_free_block(device->block);
device->block = NULL;
out_err1:
+ kfree(private->conf_data);
kfree(device->private);
device->private = NULL;
return rc;
@@ -959,7 +1025,17 @@ out_err1:
static void dasd_eckd_uncheck_device(struct dasd_device *device)
{
+ struct dasd_eckd_private *private;
+
+ private = (struct dasd_eckd_private *) device->private;
dasd_alias_disconnect_device_from_lcu(device);
+ private->ned = NULL;
+ private->sneq = NULL;
+ private->vdsneq = NULL;
+ private->gneq = NULL;
+ private->conf_len = 0;
+ kfree(private->conf_data);
+ private->conf_data = NULL;
}
static struct dasd_ccw_req *
@@ -1746,9 +1822,10 @@ dasd_eckd_fill_info(struct dasd_device * device,
info->characteristics_size = sizeof(struct dasd_eckd_characteristics);
memcpy(info->characteristics, &private->rdc_data,
sizeof(struct dasd_eckd_characteristics));
- info->confdata_size = sizeof(struct dasd_eckd_confdata);
- memcpy(info->configuration_data, &private->conf_data,
- sizeof(struct dasd_eckd_confdata));
+ info->confdata_size = min((unsigned long)private->conf_len,
+ sizeof(info->configuration_data));
+ memcpy(info->configuration_data, private->conf_data,
+ info->confdata_size);
return 0;
}
diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index fc2509c939b..4bf0aa5112c 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h
@@ -231,133 +231,62 @@ struct dasd_eckd_characteristics {
__u8 reserved3[10];
} __attribute__ ((packed));
-struct dasd_eckd_confdata {
+/* elements of the configuration data */
+struct dasd_ned {
struct {
- struct {
- unsigned char identifier:2;
- unsigned char token_id:1;
- unsigned char sno_valid:1;
- unsigned char subst_sno:1;
- unsigned char recNED:1;
- unsigned char emuNED:1;
- unsigned char reserved:1;
- } __attribute__ ((packed)) flags;
- __u8 descriptor;
- __u8 dev_class;
- __u8 reserved;
- unsigned char dev_type[6];
- unsigned char dev_model[3];
- unsigned char HDA_manufacturer[3];
- unsigned char HDA_location[2];
- unsigned char HDA_seqno[12];
- __u8 ID;
- __u8 unit_addr;
- } __attribute__ ((packed)) ned1;
- union {
- struct {
- struct {
- unsigned char identifier:2;
- unsigned char token_id:1;
- unsigned char sno_valid:1;
- unsigned char subst_sno:1;
- unsigned char recNED:1;
- unsigned char emuNED:1;
- unsigned char reserved:1;
- } __attribute__ ((packed)) flags;
- __u8 descriptor;
- __u8 reserved[2];
- unsigned char dev_type[6];
- unsigned char dev_model[3];
- unsigned char DASD_manufacturer[3];
- unsigned char DASD_location[2];
- unsigned char DASD_seqno[12];
- __u16 ID;
- } __attribute__ ((packed)) ned;
- struct {
- unsigned char flags; /* byte 0 */
- unsigned char res1; /* byte 1 */
- __u16 format; /* byte 2-3 */
- unsigned char res2[4]; /* byte 4-7 */
- unsigned char sua_flags; /* byte 8 */
- __u8 base_unit_addr; /* byte 9 */
- unsigned char res3[22]; /* byte 10-31 */
- } __attribute__ ((packed)) sneq;
- } __attribute__ ((packed)) ned2;
+ __u8 identifier:2;
+ __u8 token_id:1;
+ __u8 sno_valid:1;
+ __u8 subst_sno:1;
+ __u8 recNED:1;
+ __u8 emuNED:1;
+ __u8 reserved:1;
+ } __attribute__ ((packed)) flags;
+ __u8 descriptor;
+ __u8 dev_class;
+ __u8 reserved;
+ __u8 dev_type[6];
+ __u8 dev_model[3];
+ __u8 HDA_manufacturer[3];
+ __u8 HDA_location[2];
+ __u8 HDA_seqno[12];
+ __u8 ID;
+ __u8 unit_addr;
+} __attribute__ ((packed));
+
+struct dasd_sneq {
struct {
- struct {
- unsigned char identifier:2;
- unsigned char token_id:1;
- unsigned char sno_valid:1;
- unsigned char subst_sno:1;
- unsigned char recNED:1;
- unsigned char emuNED:1;
- unsigned char reserved:1;
- } __attribute__ ((packed)) flags;
- __u8 descriptor;
- __u8 reserved[2];
- unsigned char cont_type[6];
- unsigned char cont_model[3];
- unsigned char cont_manufacturer[3];
- unsigned char cont_location[2];
- unsigned char cont_seqno[12];
- __u16 ID;
- } __attribute__ ((packed)) ned3;
+ __u8 identifier:2;
+ __u8 reserved:6;
+ } __attribute__ ((packed)) flags;
+ __u8 res1;
+ __u16 format;
+ __u8 res2[4]; /* byte 4- 7 */
+ __u8 sua_flags; /* byte 8 */
+ __u8 base_unit_addr; /* byte 9 */
+ __u8 res3[22]; /* byte 10-31 */
+} __attribute__ ((packed));
+
+struct vd_sneq {
struct {
- struct {
- unsigned char identifier:2;
- unsigned char token_id:1;
- unsigned char sno_valid:1;
- unsigned char subst_sno:1;
- unsigned char recNED:1;
- unsigned char emuNED:1;
- unsigned char reserved:1;
- } __attribute__ ((packed)) flags;
- __u8 descriptor;
- __u8 reserved[2];
- unsigned char cont_type[6];
- unsigned char empty[3];
- unsigned char cont_manufacturer[3];
- unsigned char cont_location[2];
- unsigned char cont_seqno[12];
- __u16 ID;
- } __attribute__ ((packed)) ned4;
- unsigned char ned5[32];
- unsigned char ned6[32];
- unsigned char ned7[32];
+ __u8 identifier:2;
+ __u8 reserved:6;
+ } __attribute__ ((packed)) flags;
+ __u8 res1;
+ __u16 format;
+ __u8 res2[4]; /* byte 4- 7 */
+ __u8 uit[16]; /* byte 8-23 */
+ __u8 res3[8]; /* byte 24-31 */
+} __attribute__ ((packed));
+
+struct dasd_gneq {
struct {
- struct {
- unsigned char identifier:2;
- unsigned char reserved:6;
- } __attribute__ ((packed)) flags;
- __u8 selector;
- __u16 interfaceID;
- __u32 reserved;
- __u16 subsystemID;
- struct {
- unsigned char sp0:1;
- unsigned char sp1:1;
- unsigned char reserved:5;
- unsigned char scluster:1;
- } __attribute__ ((packed)) spathID;
- __u8 unit_address;
- __u8 dev_ID;
- __u8 dev_address;
- __u8 adapterID;
- __u16 link_address;
- struct {
- unsigned char parallel:1;
- unsigned char escon:1;
- unsigned char reserved:1;
- unsigned char ficon:1;
- unsigned char reserved2:4;
- } __attribute__ ((packed)) protocol_type;
- struct {
- unsigned char PID_in_236:1;
- unsigned char reserved:7;
- } __attribute__ ((packed)) format_flags;
- __u8 log_dev_address;
- unsigned char reserved2[12];
- } __attribute__ ((packed)) neq;
+ __u8 identifier:2;
+ __u8 reserved:6;
+ } __attribute__ ((packed)) flags;
+ __u8 reserved[7];
+ __u16 subsystemID;
+ __u8 reserved2[22];
} __attribute__ ((packed));
struct dasd_eckd_path {
@@ -463,7 +392,14 @@ struct alias_pav_group {
struct dasd_eckd_private {
struct dasd_eckd_characteristics rdc_data;
- struct dasd_eckd_confdata conf_data;
+ u8 *conf_data;
+ int conf_len;
+ /* pointers to specific parts in the conf_data */
+ struct dasd_ned *ned;
+ struct dasd_sneq *sneq;
+ struct vd_sneq *vdsneq;
+ struct dasd_gneq *gneq;
+
struct dasd_eckd_path path_data;
struct eckd_count count_area[5];
int init_cqr_status;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index fb2f931cf84..31ecaa4a40e 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -307,6 +307,7 @@ struct dasd_uid {
__u16 ssid;
__u8 real_unit_addr;
__u8 base_unit_addr;
+ char vduit[33];
};
/*
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 3c8b25e6c34..1fd8f2193ed 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -399,6 +399,7 @@ sclp_tod_from_jiffies(unsigned long jiffies)
void
sclp_sync_wait(void)
{
+ unsigned long long old_tick;
unsigned long flags;
unsigned long cr0, cr0_sync;
u64 timeout;
@@ -419,11 +420,12 @@ sclp_sync_wait(void)
if (!irq_context)
local_bh_disable();
/* Enable service-signal interruption, disable timer interrupts */
+ old_tick = local_tick_disable();
trace_hardirqs_on();
__ctl_store(cr0, 0, 0);
cr0_sync = cr0;
+ cr0_sync &= 0xffff00a0;
cr0_sync |= 0x00000200;
- cr0_sync &= 0xFFFFF3AC;
__ctl_load(cr0_sync, 0, 0);
__raw_local_irq_stosm(0x01);
/* Loop until driver state indicates finished request */
@@ -439,9 +441,9 @@ sclp_sync_wait(void)
__ctl_load(cr0, 0, 0);
if (!irq_context)
_local_bh_enable();
+ local_tick_enable(old_tick);
local_irq_restore(flags);
}
-
EXPORT_SYMBOL(sclp_sync_wait);
/* Dispatch changes in send and receive mask to registered listeners. */
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 0c2b77493db..eb5f1b8bc57 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -427,6 +427,8 @@ static int sclp_mem_notifier(struct notifier_block *nb,
sclp_attach_storage(id);
switch (action) {
case MEM_ONLINE:
+ case MEM_GOING_OFFLINE:
+ case MEM_CANCEL_OFFLINE:
break;
case MEM_GOING_ONLINE:
rc = sclp_mem_change_state(start, size, 1);
@@ -434,6 +436,9 @@ static int sclp_mem_notifier(struct notifier_block *nb,
case MEM_CANCEL_ONLINE:
sclp_mem_change_state(start, size, 0);
break;
+ case MEM_OFFLINE:
+ sclp_mem_change_state(start, size, 0);
+ break;
default:
rc = -EINVAL;
break;
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index fff4ff485d9..4cebd6ee6d2 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -8,7 +8,6 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/cpu.h>
-#include <linux/kthread.h>
#include <linux/sysdev.h>
#include <linux/workqueue.h>
#include <asm/smp.h>
@@ -41,19 +40,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
put_online_cpus();
}
-static int sclp_cpu_kthread(void *data)
-{
- smp_rescan_cpus();
- return 0;
-}
-
static void __ref sclp_cpu_change_notify(struct work_struct *work)
{
- /* Can't call smp_rescan_cpus() from workqueue context since it may
- * deadlock in case of cpu hotplug. So we have to create a kernel
- * thread in order to call it.
- */
- kthread_run(sclp_cpu_kthread, NULL, "cpu_rescan");
+ smp_rescan_cpus();
}
static void sclp_conf_receiver_fn(struct evbuf_header *evbuf)
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index ef7bc0a125e..cf8f24a4b5e 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -5,7 +5,7 @@
* Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
*/
-#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/bitops.h>
#include "idset.h"
#include "css.h"
@@ -25,18 +25,18 @@ static struct idset *idset_new(int num_ssid, int num_id)
{
struct idset *set;
- set = kzalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id),
- GFP_KERNEL);
+ set = vmalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id));
if (set) {
set->num_ssid = num_ssid;
set->num_id = num_id;
+ memset(set->bitmap, 0, bitmap_size(num_ssid, num_id));
}
return set;
}
void idset_free(struct idset *set)
{
- kfree(set);
+ vfree(set);
}
void idset_clear(struct idset *set)
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index d10c73cc168..d15648514a0 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1355,7 +1355,7 @@ int qdio_allocate(struct qdio_initialize *init_data)
goto out_rel;
/* qdr is used in ccw1.cda which is u32 */
- irq_ptr->qdr = kzalloc(sizeof(struct qdr), GFP_KERNEL | GFP_DMA);
+ irq_ptr->qdr = (struct qdr *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!irq_ptr->qdr)
goto out_rel;
WARN_ON((unsigned long)irq_ptr->qdr & 0xfff);
diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c
index ea01b85b1cc..ec5c4a41423 100644
--- a/drivers/s390/cio/qdio_perf.c
+++ b/drivers/s390/cio/qdio_perf.c
@@ -142,7 +142,7 @@ int __init qdio_setup_perf_stats(void)
return 0;
}
-void __exit qdio_remove_perf_stats(void)
+void qdio_remove_perf_stats(void)
{
#ifdef CONFIG_PROC_FS
remove_proc_entry("qdio_perf", NULL);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index f0923a8aced..1bd2a208db2 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -325,7 +325,7 @@ void qdio_release_memory(struct qdio_irq *irq_ptr)
kmem_cache_free(qdio_q_cache, q);
}
}
- kfree(irq_ptr->qdr);
+ free_page((unsigned long) irq_ptr->qdr);
free_page(irq_ptr->chsc_page);
free_page((unsigned long) irq_ptr);
}
@@ -515,7 +515,7 @@ int __init qdio_setup_init(void)
return 0;
}
-void __exit qdio_setup_exit(void)
+void qdio_setup_exit(void)
{
kmem_cache_destroy(qdio_q_cache);
}
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 79954bd6bfa..292b60da6dc 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -352,7 +352,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
return len;
}
-void s390_virtio_console_init(void)
+void __init s390_virtio_console_init(void)
{
virtio_cons_early_init(early_put_chars);
}
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 1895dbb553c..80971c21ea1 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -419,6 +419,7 @@ struct qeth_qdio_out_buffer {
int next_element_to_fill;
struct sk_buff_head skb_list;
struct list_head ctx_list;
+ int is_header[16];
};
struct qeth_card;
@@ -785,7 +786,7 @@ void qeth_core_remove_osn_attributes(struct device *);
/* exports for qeth discipline device drivers */
extern struct qeth_card_list_struct qeth_core_card_list;
-
+extern struct kmem_cache *qeth_core_header_cache;
extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int);
@@ -843,7 +844,7 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int);
int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
struct sk_buff *, struct qeth_hdr *, int,
- struct qeth_eddp_context *);
+ struct qeth_eddp_context *, int, int);
int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *,
struct sk_buff *, struct qeth_hdr *,
int, struct qeth_eddp_context *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index cebb25e36e8..bd420d1b9a0 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,8 +19,8 @@
#include <linux/mii.h>
#include <linux/kthread.h>
-#include <asm-s390/ebcdic.h>
-#include <asm-s390/io.h>
+#include <asm/ebcdic.h>
+#include <asm/io.h>
#include <asm/s390_rdev.h>
#include "qeth_core.h"
@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(qeth_dbf);
struct qeth_card_list_struct qeth_core_card_list;
EXPORT_SYMBOL_GPL(qeth_core_card_list);
+struct kmem_cache *qeth_core_header_cache;
+EXPORT_SYMBOL_GPL(qeth_core_header_cache);
static struct device *qeth_core_root_dev;
static unsigned int known_devices[][10] = QETH_MODELLIST_ARRAY;
@@ -933,6 +935,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
}
qeth_eddp_buf_release_contexts(buf);
for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
+ if (buf->buffer->element[i].addr && buf->is_header[i])
+ kmem_cache_free(qeth_core_header_cache,
+ buf->buffer->element[i].addr);
+ buf->is_header[i] = 0;
buf->buffer->element[i].length = 0;
buf->buffer->element[i].addr = NULL;
buf->buffer->element[i].flags = 0;
@@ -3002,8 +3008,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr,
if (skb_shinfo(skb)->nr_frags > 0)
elements_needed = (skb_shinfo(skb)->nr_frags + 1);
if (elements_needed == 0)
- elements_needed = 1 + (((((unsigned long) hdr) % PAGE_SIZE)
- + skb->len) >> PAGE_SHIFT);
+ elements_needed = 1 + (((((unsigned long) skb->data) %
+ PAGE_SIZE) + skb->len) >> PAGE_SHIFT);
if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
"(Number=%d / Length=%d). Discarded.\n",
@@ -3015,7 +3021,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr,
EXPORT_SYMBOL_GPL(qeth_get_elements_no);
static inline void __qeth_fill_buffer(struct sk_buff *skb,
- struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill)
+ struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill,
+ int offset)
{
int length = skb->len;
int length_here;
@@ -3027,6 +3034,11 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
data = skb->data;
first_lap = (is_tso == 0 ? 1 : 0);
+ if (offset >= 0) {
+ data = skb->data + offset;
+ first_lap = 0;
+ }
+
while (length > 0) {
/* length_here is the remaining amount of data in this page */
length_here = PAGE_SIZE - ((unsigned long) data % PAGE_SIZE);
@@ -3058,22 +3070,22 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
}
static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
- struct qeth_qdio_out_buffer *buf, struct sk_buff *skb)
+ struct qeth_qdio_out_buffer *buf, struct sk_buff *skb,
+ struct qeth_hdr *hdr, int offset, int hd_len)
{
struct qdio_buffer *buffer;
- struct qeth_hdr_tso *hdr;
int flush_cnt = 0, hdr_len, large_send = 0;
buffer = buf->buffer;
atomic_inc(&skb->users);
skb_queue_tail(&buf->skb_list, skb);
- hdr = (struct qeth_hdr_tso *) skb->data;
/*check first on TSO ....*/
- if (hdr->hdr.hdr.l3.id == QETH_HEADER_TYPE_TSO) {
+ if (hdr->hdr.l3.id == QETH_HEADER_TYPE_TSO) {
int element = buf->next_element_to_fill;
- hdr_len = sizeof(struct qeth_hdr_tso) + hdr->ext.dg_hdr_len;
+ hdr_len = sizeof(struct qeth_hdr_tso) +
+ ((struct qeth_hdr_tso *)hdr)->ext.dg_hdr_len;
/*fill first buffer entry only with header information */
buffer->element[element].addr = skb->data;
buffer->element[element].length = hdr_len;
@@ -3083,9 +3095,20 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
skb->len -= hdr_len;
large_send = 1;
}
+
+ if (offset >= 0) {
+ int element = buf->next_element_to_fill;
+ buffer->element[element].addr = hdr;
+ buffer->element[element].length = sizeof(struct qeth_hdr) +
+ hd_len;
+ buffer->element[element].flags = SBAL_FLAGS_FIRST_FRAG;
+ buf->is_header[element] = 1;
+ buf->next_element_to_fill++;
+ }
+
if (skb_shinfo(skb)->nr_frags == 0)
__qeth_fill_buffer(skb, buffer, large_send,
- (int *)&buf->next_element_to_fill);
+ (int *)&buf->next_element_to_fill, offset);
else
__qeth_fill_buffer_frag(skb, buffer, large_send,
(int *)&buf->next_element_to_fill);
@@ -3115,7 +3138,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
int qeth_do_send_packet_fast(struct qeth_card *card,
struct qeth_qdio_out_q *queue, struct sk_buff *skb,
struct qeth_hdr *hdr, int elements_needed,
- struct qeth_eddp_context *ctx)
+ struct qeth_eddp_context *ctx, int offset, int hd_len)
{
struct qeth_qdio_out_buffer *buffer;
int buffers_needed = 0;
@@ -3148,7 +3171,7 @@ int qeth_do_send_packet_fast(struct qeth_card *card,
}
atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
if (ctx == NULL) {
- qeth_fill_buffer(queue, buffer, skb);
+ qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
qeth_flush_buffers(queue, index, 1);
} else {
flush_cnt = qeth_eddp_fill_buffer(queue, ctx, index);
@@ -3224,7 +3247,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
}
}
if (ctx == NULL)
- tmp = qeth_fill_buffer(queue, buffer, skb);
+ tmp = qeth_fill_buffer(queue, buffer, skb, hdr, -1, 0);
else {
tmp = qeth_eddp_fill_buffer(queue, ctx,
queue->next_buf_to_fill);
@@ -4443,8 +4466,17 @@ static int __init qeth_core_init(void)
rc = IS_ERR(qeth_core_root_dev) ? PTR_ERR(qeth_core_root_dev) : 0;
if (rc)
goto register_err;
- return 0;
+ qeth_core_header_cache = kmem_cache_create("qeth_hdr",
+ sizeof(struct qeth_hdr) + ETH_HLEN, 64, 0, NULL);
+ if (!qeth_core_header_cache) {
+ rc = -ENOMEM;
+ goto slab_err;
+ }
+
+ return 0;
+slab_err:
+ s390_root_dev_unregister(qeth_core_root_dev);
register_err:
driver_remove_file(&qeth_core_ccwgroup_driver.driver,
&driver_attr_group);
@@ -4466,6 +4498,7 @@ static void __exit qeth_core_exit(void)
&driver_attr_group);
ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver);
ccw_driver_unregister(&qeth_ccw_driver);
+ kmem_cache_destroy(qeth_core_header_cache);
qeth_unregister_dbf_views();
PRINT_INFO("core functions removed\n");
}
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index a8b069cd9a4..b3cee032f57 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -243,8 +243,7 @@ static void qeth_l2_get_packet_type(struct qeth_card *card,
static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
struct sk_buff *skb, int ipv, int cast_type)
{
- struct vlan_ethhdr *veth = (struct vlan_ethhdr *)((skb->data) +
- QETH_HEADER_SIZE);
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
memset(hdr, 0, sizeof(struct qeth_hdr));
hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2;
@@ -621,6 +620,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
int tx_bytes = skb->len;
enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO;
struct qeth_eddp_context *ctx = NULL;
+ int data_offset = -1;
+ int elements_needed = 0;
+ int hd_len = 0;
if ((card->state != CARD_STATE_UP) || !card->lan_online) {
card->stats.tx_carrier_errors++;
@@ -643,13 +645,32 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (card->info.type == QETH_CARD_TYPE_OSN)
hdr = (struct qeth_hdr *)skb->data;
else {
- /* create a clone with writeable headroom */
- new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr));
- if (!new_skb)
- goto tx_drop;
- hdr = (struct qeth_hdr *)skb_push(new_skb,
+ if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
+ (skb_shinfo(skb)->nr_frags == 0)) {
+ new_skb = skb;
+ data_offset = ETH_HLEN;
+ hd_len = ETH_HLEN;
+ hdr = kmem_cache_alloc(qeth_core_header_cache,
+ GFP_ATOMIC);
+ if (!hdr)
+ goto tx_drop;
+ elements_needed++;
+ skb_reset_mac_header(new_skb);
+ qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type);
+ hdr->hdr.l2.pkt_length = new_skb->len;
+ memcpy(((char *)hdr) + sizeof(struct qeth_hdr),
+ skb_mac_header(new_skb), ETH_HLEN);
+ } else {
+ /* create a clone with writeable headroom */
+ new_skb = skb_realloc_headroom(skb,
+ sizeof(struct qeth_hdr));
+ if (!new_skb)
+ goto tx_drop;
+ hdr = (struct qeth_hdr *)skb_push(new_skb,
sizeof(struct qeth_hdr));
- qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type);
+ skb_set_mac_header(new_skb, sizeof(struct qeth_hdr));
+ qeth_l2_fill_header(card, hdr, new_skb, ipv, cast_type);
+ }
}
if (large_send == QETH_LARGE_SEND_EDDP) {
@@ -660,9 +681,13 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_drop;
}
} else {
- elements = qeth_get_elements_no(card, (void *)hdr, new_skb, 0);
- if (!elements)
+ elements = qeth_get_elements_no(card, (void *)hdr, new_skb,
+ elements_needed);
+ if (!elements) {
+ if (data_offset >= 0)
+ kmem_cache_free(qeth_core_header_cache, hdr);
goto tx_drop;
+ }
}
if ((large_send == QETH_LARGE_SEND_NO) &&
@@ -674,7 +699,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
elements, ctx);
else
rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
- elements, ctx);
+ elements, ctx, data_offset, hd_len);
if (!rc) {
card->stats.tx_packets++;
card->stats.tx_bytes += tx_bytes;
@@ -701,6 +726,9 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (ctx != NULL)
qeth_eddp_put_context(ctx);
+ if (data_offset >= 0)
+ kmem_cache_free(qeth_core_header_cache, hdr);
+
if (rc == -EBUSY) {
if (new_skb != skb)
dev_kfree_skb_any(new_skb);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3e1d1385735..dd72c3c2016 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2604,6 +2604,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
int tx_bytes = skb->len;
enum qeth_large_send_types large_send = QETH_LARGE_SEND_NO;
struct qeth_eddp_context *ctx = NULL;
+ int data_offset = -1;
if ((card->info.type == QETH_CARD_TYPE_IQD) &&
(skb->protocol != htons(ETH_P_IPV6)) &&
@@ -2624,14 +2625,28 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
card->perf_stats.outbound_start_time = qeth_get_micros();
}
- /* create a clone with writeable headroom */
- new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) +
- VLAN_HLEN);
- if (!new_skb)
- goto tx_drop;
+ if (skb_is_gso(skb))
+ large_send = card->options.large_send;
+
+ if ((card->info.type == QETH_CARD_TYPE_IQD) && (!large_send) &&
+ (skb_shinfo(skb)->nr_frags == 0)) {
+ new_skb = skb;
+ data_offset = ETH_HLEN;
+ hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
+ if (!hdr)
+ goto tx_drop;
+ elements_needed++;
+ } else {
+ /* create a clone with writeable headroom */
+ new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso)
+ + VLAN_HLEN);
+ if (!new_skb)
+ goto tx_drop;
+ }
if (card->info.type == QETH_CARD_TYPE_IQD) {
- skb_pull(new_skb, ETH_HLEN);
+ if (data_offset < 0)
+ skb_pull(new_skb, ETH_HLEN);
} else {
if (new_skb->protocol == htons(ETH_P_IP)) {
if (card->dev->type == ARPHRD_IEEE802_TR)
@@ -2657,9 +2672,6 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
- if (skb_is_gso(new_skb))
- large_send = card->options.large_send;
-
/* fix hardware limitation: as long as we do not have sbal
* chaining we can not send long frag lists so we temporary
* switch to EDDP
@@ -2677,9 +2689,16 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
qeth_tso_fill_header(card, hdr, new_skb);
elements_needed++;
} else {
- hdr = (struct qeth_hdr *)skb_push(new_skb,
+ if (data_offset < 0) {
+ hdr = (struct qeth_hdr *)skb_push(new_skb,
sizeof(struct qeth_hdr));
- qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type);
+ qeth_l3_fill_header(card, hdr, new_skb, ipv,
+ cast_type);
+ } else {
+ qeth_l3_fill_header(card, hdr, new_skb, ipv,
+ cast_type);
+ hdr->hdr.l3.length = new_skb->len - data_offset;
+ }
}
if (large_send == QETH_LARGE_SEND_EDDP) {
@@ -2695,8 +2714,11 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
} else {
int elems = qeth_get_elements_no(card, (void *)hdr, new_skb,
elements_needed);
- if (!elems)
+ if (!elems) {
+ if (data_offset >= 0)
+ kmem_cache_free(qeth_core_header_cache, hdr);
goto tx_drop;
+ }
elements_needed += elems;
}
@@ -2709,7 +2731,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
elements_needed, ctx);
else
rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
- elements_needed, ctx);
+ elements_needed, ctx, data_offset, 0);
if (!rc) {
card->stats.tx_packets++;
@@ -2737,6 +2759,9 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (ctx != NULL)
qeth_eddp_put_context(ctx);
+ if (data_offset >= 0)
+ kmem_cache_free(qeth_core_header_cache, hdr);
+
if (rc == -EBUSY) {
if (new_skb != skb)
dev_kfree_skb_any(new_skb);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a97f1ae11f7..342e12fb1c2 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1885,7 +1885,7 @@ static int serial8250_startup(struct uart_port *port)
* the interrupt is enabled. Delays are necessary to
* allow register changes to become visible.
*/
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
if (up->port.flags & UPF_SHARE_IRQ)
disable_irq_nosync(up->port.irq);
@@ -1901,7 +1901,7 @@ static int serial8250_startup(struct uart_port *port)
if (up->port.flags & UPF_SHARE_IRQ)
enable_irq(up->port.irq);
- spin_unlock(&up->port.lock);
+ spin_unlock_irqrestore(&up->port.lock, flags);
/*
* If the interrupt is not reasserted, setup a timer to
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 3a0bbbe17aa..7e7383e890d 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_SERIAL_68328) += 68328serial.o
obj-$(CONFIG_SERIAL_68360) += 68360serial.o
obj-$(CONFIG_SERIAL_COLDFIRE) += mcfserial.o
obj-$(CONFIG_SERIAL_MCF) += mcf.o
-obj-$(CONFIG_V850E_UART) += v850e_uart.o
obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o
obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o
obj-$(CONFIG_SERIAL_DZ) += dz.o
diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h
index 5c76e0ae058..7274b527a3c 100644
--- a/drivers/serial/cpm_uart/cpm_uart.h
+++ b/drivers/serial/cpm_uart/cpm_uart.h
@@ -50,6 +50,15 @@
#define SCC_WAIT_CLOSING 100
+#define GPIO_CTS 0
+#define GPIO_RTS 1
+#define GPIO_DCD 2
+#define GPIO_DSR 3
+#define GPIO_DTR 4
+#define GPIO_RI 5
+
+#define NUM_GPIOS (GPIO_RI+1)
+
struct uart_cpm_port {
struct uart_port port;
u16 rx_nrfifos;
@@ -68,6 +77,7 @@ struct uart_cpm_port {
unsigned char *rx_buf;
u32 flags;
void (*set_lineif)(struct uart_cpm_port *);
+ struct clk *clk;
u8 brg;
uint dp_addr;
void *mem_addr;
@@ -82,6 +92,7 @@ struct uart_cpm_port {
int wait_closing;
/* value to combine with opcode to form cpm command */
u32 command;
+ int gpios[NUM_GPIOS];
};
extern int cpm_uart_nr;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index a4f86927a74..25efca5a7a1 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -43,6 +43,9 @@
#include <linux/dma-mapping.h>
#include <linux/fs_uart_pd.h>
#include <linux/of_platform.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/clk.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -96,13 +99,41 @@ static unsigned int cpm_uart_tx_empty(struct uart_port *port)
static void cpm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
- /* Whee. Do nothing. */
+ struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
+
+ if (pinfo->gpios[GPIO_RTS] >= 0)
+ gpio_set_value(pinfo->gpios[GPIO_RTS], !(mctrl & TIOCM_RTS));
+
+ if (pinfo->gpios[GPIO_DTR] >= 0)
+ gpio_set_value(pinfo->gpios[GPIO_DTR], !(mctrl & TIOCM_DTR));
}
static unsigned int cpm_uart_get_mctrl(struct uart_port *port)
{
- /* Whee. Do nothing. */
- return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+ struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
+ unsigned int mctrl = TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+
+ if (pinfo->gpios[GPIO_CTS] >= 0) {
+ if (gpio_get_value(pinfo->gpios[GPIO_CTS]))
+ mctrl &= ~TIOCM_CTS;
+ }
+
+ if (pinfo->gpios[GPIO_DSR] >= 0) {
+ if (gpio_get_value(pinfo->gpios[GPIO_DSR]))
+ mctrl &= ~TIOCM_DSR;
+ }
+
+ if (pinfo->gpios[GPIO_DCD] >= 0) {
+ if (gpio_get_value(pinfo->gpios[GPIO_DCD]))
+ mctrl &= ~TIOCM_CAR;
+ }
+
+ if (pinfo->gpios[GPIO_RI] >= 0) {
+ if (!gpio_get_value(pinfo->gpios[GPIO_RI]))
+ mctrl |= TIOCM_RNG;
+ }
+
+ return mctrl;
}
/*
@@ -566,7 +597,10 @@ static void cpm_uart_set_termios(struct uart_port *port,
out_be16(&sccp->scc_psmr, (sbits << 12) | scval);
}
- cpm_set_brg(pinfo->brg - 1, baud);
+ if (pinfo->clk)
+ clk_set_rate(pinfo->clk, baud);
+ else
+ cpm_set_brg(pinfo->brg - 1, baud);
spin_unlock_irqrestore(&port->lock, flags);
}
@@ -991,14 +1025,23 @@ static int cpm_uart_init_port(struct device_node *np,
void __iomem *mem, *pram;
int len;
int ret;
+ int i;
- data = of_get_property(np, "fsl,cpm-brg", &len);
- if (!data || len != 4) {
- printk(KERN_ERR "CPM UART %s has no/invalid "
- "fsl,cpm-brg property.\n", np->name);
- return -EINVAL;
+ data = of_get_property(np, "clock", NULL);
+ if (data) {
+ struct clk *clk = clk_get(NULL, (const char*)data);
+ if (!IS_ERR(clk))
+ pinfo->clk = clk;
+ }
+ if (!pinfo->clk) {
+ data = of_get_property(np, "fsl,cpm-brg", &len);
+ if (!data || len != 4) {
+ printk(KERN_ERR "CPM UART %s has no/invalid "
+ "fsl,cpm-brg property.\n", np->name);
+ return -EINVAL;
+ }
+ pinfo->brg = *data;
}
- pinfo->brg = *data;
data = of_get_property(np, "fsl,cpm-command", &len);
if (!data || len != 4) {
@@ -1050,6 +1093,9 @@ static int cpm_uart_init_port(struct device_node *np,
goto out_pram;
}
+ for (i = 0; i < NUM_GPIOS; i++)
+ pinfo->gpios[i] = of_get_gpio(np, i);
+
return cpm_uart_request_port(&pinfo->port);
out_pram:
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index cd728df6a01..8a0749e34ca 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -451,19 +451,21 @@ SCIx_FNS(SCxSR, 0x08, 8, 0x10, 8, 0x08, 16, 0x10, 16, 0x04, 8)
SCIx_FNS(SCxRDR, 0x0a, 8, 0x14, 8, 0x0A, 8, 0x14, 8, 0x05, 8)
SCIF_FNS(SCFCR, 0x0c, 8, 0x18, 16)
#if defined(CONFIG_CPU_SUBTYPE_SH7760) || \
- defined(CONFIG_CPU_SUBTYPE_SH7763) || \
defined(CONFIG_CPU_SUBTYPE_SH7780) || \
defined(CONFIG_CPU_SUBTYPE_SH7785)
+SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16)
SCIF_FNS(SCTFDR, 0x0e, 16, 0x1C, 16)
SCIF_FNS(SCRFDR, 0x0e, 16, 0x20, 16)
SCIF_FNS(SCSPTR, 0, 0, 0x24, 16)
SCIF_FNS(SCLSR, 0, 0, 0x28, 16)
-#if defined(CONFIG_CPU_SUBTYPE_SH7763)
-/* SH7763 SCIF2 */
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
SCIF_FNS(SCFDR, 0, 0, 0x1C, 16)
SCIF_FNS(SCSPTR2, 0, 0, 0x20, 16)
-SCIF_FNS(SCLSR2, 0, 0, 0x24, 16)
-#endif /* CONFIG_CPU_SUBTYPE_SH7763 */
+SCIF_FNS(SCLSR2, 0, 0, 0x24, 16)
+SCIF_FNS(SCTFDR, 0x0e, 16, 0x1C, 16)
+SCIF_FNS(SCRFDR, 0x0e, 16, 0x20, 16)
+SCIF_FNS(SCSPTR, 0, 0, 0x24, 16)
+SCIF_FNS(SCLSR, 0, 0, 0x28, 16)
#else
SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16)
#if defined(CONFIG_CPU_SUBTYPE_SH7722)
diff --git a/drivers/serial/v850e_uart.c b/drivers/serial/v850e_uart.c
deleted file mode 100644
index 5acf061b6cd..00000000000
--- a/drivers/serial/v850e_uart.c
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- * drivers/serial/v850e_uart.c -- Serial I/O using V850E on-chip UART or UARTB
- *
- * Copyright (C) 2001,02,03 NEC Electronics Corporation
- * Copyright (C) 2001,02,03 Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-/* This driver supports both the original V850E UART interface (called
- merely `UART' in the docs) and the newer `UARTB' interface, which is
- roughly a superset of the first one. The selection is made at
- configure time -- if CONFIG_V850E_UARTB is defined, then UARTB is
- presumed, otherwise the old UART -- as these are on-CPU UARTS, a system
- can never have both.
-
- The UARTB interface also has a 16-entry FIFO mode, which is not
- yet supported by this driver. */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/console.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-
-#include <asm/v850e_uart.h>
-
-/* Initial UART state. This may be overridden by machine-dependent headers. */
-#ifndef V850E_UART_INIT_BAUD
-#define V850E_UART_INIT_BAUD 115200
-#endif
-#ifndef V850E_UART_INIT_CFLAGS
-#define V850E_UART_INIT_CFLAGS (B115200 | CS8 | CREAD)
-#endif
-
-/* A string used for prefixing printed descriptions; since the same UART
- macro is actually used on other chips than the V850E. This must be a
- constant string. */
-#ifndef V850E_UART_CHIP_NAME
-#define V850E_UART_CHIP_NAME "V850E"
-#endif
-
-#define V850E_UART_MINOR_BASE 64 /* First tty minor number */
-
-
-/* Low-level UART functions. */
-
-/* Configure and turn on uart channel CHAN, using the termios `control
- modes' bits in CFLAGS, and a baud-rate of BAUD. */
-void v850e_uart_configure (unsigned chan, unsigned cflags, unsigned baud)
-{
- int flags;
- v850e_uart_speed_t old_speed;
- v850e_uart_config_t old_config;
- v850e_uart_speed_t new_speed = v850e_uart_calc_speed (baud);
- v850e_uart_config_t new_config = v850e_uart_calc_config (cflags);
-
- /* Disable interrupts while we're twiddling the hardware. */
- local_irq_save (flags);
-
-#ifdef V850E_UART_PRE_CONFIGURE
- V850E_UART_PRE_CONFIGURE (chan, cflags, baud);
-#endif
-
- old_config = V850E_UART_CONFIG (chan);
- old_speed = v850e_uart_speed (chan);
-
- if (! v850e_uart_speed_eq (old_speed, new_speed)) {
- /* The baud rate has changed. First, disable the UART. */
- V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_FINI;
- old_config = 0; /* Force the uart to be re-initialized. */
-
- /* Reprogram the baud-rate generator. */
- v850e_uart_set_speed (chan, new_speed);
- }
-
- if (! (old_config & V850E_UART_CONFIG_ENABLED)) {
- /* If we are using the uart for the first time, start by
- enabling it, which must be done before turning on any
- other bits. */
- V850E_UART_CONFIG (chan) = V850E_UART_CONFIG_INIT;
- /* See the initial state. */
- old_config = V850E_UART_CONFIG (chan);
- }
-
- if (new_config != old_config) {
- /* Which of the TXE/RXE bits we'll temporarily turn off
- before changing other control bits. */
- unsigned temp_disable = 0;
- /* Which of the TXE/RXE bits will be enabled. */
- unsigned enable = 0;
- unsigned changed_bits = new_config ^ old_config;
-
- /* Which of RX/TX will be enabled in the new configuration. */
- if (new_config & V850E_UART_CONFIG_RX_BITS)
- enable |= (new_config & V850E_UART_CONFIG_RX_ENABLE);
- if (new_config & V850E_UART_CONFIG_TX_BITS)
- enable |= (new_config & V850E_UART_CONFIG_TX_ENABLE);
-
- /* Figure out which of RX/TX needs to be disabled; note
- that this will only happen if they're not already
- disabled. */
- if (changed_bits & V850E_UART_CONFIG_RX_BITS)
- temp_disable
- |= (old_config & V850E_UART_CONFIG_RX_ENABLE);
- if (changed_bits & V850E_UART_CONFIG_TX_BITS)
- temp_disable
- |= (old_config & V850E_UART_CONFIG_TX_ENABLE);
-
- /* We have to turn off RX and/or TX mode before changing
- any associated control bits. */
- if (temp_disable)
- V850E_UART_CONFIG (chan) = old_config & ~temp_disable;
-
- /* Write the new control bits, while RX/TX are disabled. */
- if (changed_bits & ~enable)
- V850E_UART_CONFIG (chan) = new_config & ~enable;
-
- v850e_uart_config_delay (new_config, new_speed);
-
- /* Write the final version, with enable bits turned on. */
- V850E_UART_CONFIG (chan) = new_config;
- }
-
- local_irq_restore (flags);
-}
-
-
-/* Low-level console. */
-
-#ifdef CONFIG_V850E_UART_CONSOLE
-
-static void v850e_uart_cons_write (struct console *co,
- const char *s, unsigned count)
-{
- if (count > 0) {
- unsigned chan = co->index;
- unsigned irq = V850E_UART_TX_IRQ (chan);
- int irq_was_enabled, irq_was_pending, flags;
-
- /* We don't want to get `transmission completed'
- interrupts, since we're busy-waiting, so we disable them
- while sending (we don't disable interrupts entirely
- because sending over a serial line is really slow). We
- save the status of the tx interrupt and restore it when
- we're done so that using printk doesn't interfere with
- normal serial transmission (other than interleaving the
- output, of course!). This should work correctly even if
- this function is interrupted and the interrupt printks
- something. */
-
- /* Disable interrupts while fiddling with tx interrupt. */
- local_irq_save (flags);
- /* Get current tx interrupt status. */
- irq_was_enabled = v850e_intc_irq_enabled (irq);
- irq_was_pending = v850e_intc_irq_pending (irq);
- /* Disable tx interrupt if necessary. */
- if (irq_was_enabled)
- v850e_intc_disable_irq (irq);
- /* Turn interrupts back on. */
- local_irq_restore (flags);
-
- /* Send characters. */
- while (count > 0) {
- int ch = *s++;
-
- if (ch == '\n') {
- /* We don't have the benefit of a tty
- driver, so translate NL into CR LF. */
- v850e_uart_wait_for_xmit_ok (chan);
- v850e_uart_putc (chan, '\r');
- }
-
- v850e_uart_wait_for_xmit_ok (chan);
- v850e_uart_putc (chan, ch);
-
- count--;
- }
-
- /* Restore saved tx interrupt status. */
- if (irq_was_enabled) {
- /* Wait for the last character we sent to be
- completely transmitted (as we'll get an
- interrupt interrupt at that point). */
- v850e_uart_wait_for_xmit_done (chan);
- /* Clear pending interrupts received due
- to our transmission, unless there was already
- one pending, in which case we want the
- handler to be called. */
- if (! irq_was_pending)
- v850e_intc_clear_pending_irq (irq);
- /* ... and then turn back on handling. */
- v850e_intc_enable_irq (irq);
- }
- }
-}
-
-extern struct uart_driver v850e_uart_driver;
-static struct console v850e_uart_cons =
-{
- .name = "ttyS",
- .write = v850e_uart_cons_write,
- .device = uart_console_device,
- .flags = CON_PRINTBUFFER,
- .cflag = V850E_UART_INIT_CFLAGS,
- .index = -1,
- .data = &v850e_uart_driver,
-};
-
-void v850e_uart_cons_init (unsigned chan)
-{
- v850e_uart_configure (chan, V850E_UART_INIT_CFLAGS,
- V850E_UART_INIT_BAUD);
- v850e_uart_cons.index = chan;
- register_console (&v850e_uart_cons);
- printk ("Console: %s on-chip UART channel %d\n",
- V850E_UART_CHIP_NAME, chan);
-}
-
-/* This is what the init code actually calls. */
-static int v850e_uart_console_init (void)
-{
- v850e_uart_cons_init (V850E_UART_CONSOLE_CHANNEL);
- return 0;
-}
-console_initcall(v850e_uart_console_init);
-
-#define V850E_UART_CONSOLE &v850e_uart_cons
-
-#else /* !CONFIG_V850E_UART_CONSOLE */
-#define V850E_UART_CONSOLE 0
-#endif /* CONFIG_V850E_UART_CONSOLE */
-
-/* TX/RX interrupt handlers. */
-
-static void v850e_uart_stop_tx (struct uart_port *port);
-
-void v850e_uart_tx (struct uart_port *port)
-{
- struct circ_buf *xmit = &port->info->xmit;
- int stopped = uart_tx_stopped (port);
-
- if (v850e_uart_xmit_ok (port->line)) {
- int tx_ch;
-
- if (port->x_char) {
- tx_ch = port->x_char;
- port->x_char = 0;
- } else if (!uart_circ_empty (xmit) && !stopped) {
- tx_ch = xmit->buf[xmit->tail];
- xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
- } else
- goto no_xmit;
-
- v850e_uart_putc (port->line, tx_ch);
- port->icount.tx++;
-
- if (uart_circ_chars_pending (xmit) < WAKEUP_CHARS)
- uart_write_wakeup (port);
- }
-
- no_xmit:
- if (uart_circ_empty (xmit) || stopped)
- v850e_uart_stop_tx (port, stopped);
-}
-
-static irqreturn_t v850e_uart_tx_irq(int irq, void *data)
-{
- struct uart_port *port = data;
- v850e_uart_tx (port);
- return IRQ_HANDLED;
-}
-
-static irqreturn_t v850e_uart_rx_irq(int irq, void *data)
-{
- struct uart_port *port = data;
- unsigned ch_stat = TTY_NORMAL;
- unsigned ch = v850e_uart_getc (port->line);
- unsigned err = v850e_uart_err (port->line);
-
- if (err) {
- if (err & V850E_UART_ERR_OVERRUN) {
- ch_stat = TTY_OVERRUN;
- port->icount.overrun++;
- } else if (err & V850E_UART_ERR_FRAME) {
- ch_stat = TTY_FRAME;
- port->icount.frame++;
- } else if (err & V850E_UART_ERR_PARITY) {
- ch_stat = TTY_PARITY;
- port->icount.parity++;
- }
- }
-
- port->icount.rx++;
-
- tty_insert_flip_char (port->info->port.tty, ch, ch_stat);
- tty_schedule_flip (port->info->port.tty);
-
- return IRQ_HANDLED;
-}
-
-
-/* Control functions for the serial framework. */
-
-static void v850e_uart_nop (struct uart_port *port) { }
-static int v850e_uart_success (struct uart_port *port) { return 0; }
-
-static unsigned v850e_uart_tx_empty (struct uart_port *port)
-{
- return TIOCSER_TEMT; /* Can't detect. */
-}
-
-static void v850e_uart_set_mctrl (struct uart_port *port, unsigned mctrl)
-{
-#ifdef V850E_UART_SET_RTS
- V850E_UART_SET_RTS (port->line, (mctrl & TIOCM_RTS));
-#endif
-}
-
-static unsigned v850e_uart_get_mctrl (struct uart_port *port)
-{
- /* We don't support DCD or DSR, so consider them permanently active. */
- int mctrl = TIOCM_CAR | TIOCM_DSR;
-
- /* We may support CTS. */
-#ifdef V850E_UART_CTS
- mctrl |= V850E_UART_CTS(port->line) ? TIOCM_CTS : 0;
-#else
- mctrl |= TIOCM_CTS;
-#endif
-
- return mctrl;
-}
-
-static void v850e_uart_start_tx (struct uart_port *port)
-{
- v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line));
- v850e_uart_tx (port);
- v850e_intc_enable_irq (V850E_UART_TX_IRQ (port->line));
-}
-
-static void v850e_uart_stop_tx (struct uart_port *port)
-{
- v850e_intc_disable_irq (V850E_UART_TX_IRQ (port->line));
-}
-
-static void v850e_uart_start_rx (struct uart_port *port)
-{
- v850e_intc_enable_irq (V850E_UART_RX_IRQ (port->line));
-}
-
-static void v850e_uart_stop_rx (struct uart_port *port)
-{
- v850e_intc_disable_irq (V850E_UART_RX_IRQ (port->line));
-}
-
-static void v850e_uart_break_ctl (struct uart_port *port, int break_ctl)
-{
- /* Umm, do this later. */
-}
-
-static int v850e_uart_startup (struct uart_port *port)
-{
- int err;
-
- /* Alloc RX irq. */
- err = request_irq (V850E_UART_RX_IRQ (port->line), v850e_uart_rx_irq,
- IRQF_DISABLED, "v850e_uart", port);
- if (err)
- return err;
-
- /* Alloc TX irq. */
- err = request_irq (V850E_UART_TX_IRQ (port->line), v850e_uart_tx_irq,
- IRQF_DISABLED, "v850e_uart", port);
- if (err) {
- free_irq (V850E_UART_RX_IRQ (port->line), port);
- return err;
- }
-
- v850e_uart_start_rx (port);
-
- return 0;
-}
-
-static void v850e_uart_shutdown (struct uart_port *port)
-{
- /* Disable port interrupts. */
- free_irq (V850E_UART_TX_IRQ (port->line), port);
- free_irq (V850E_UART_RX_IRQ (port->line), port);
-
- /* Turn off xmit/recv enable bits. */
- V850E_UART_CONFIG (port->line)
- &= ~(V850E_UART_CONFIG_TX_ENABLE
- | V850E_UART_CONFIG_RX_ENABLE);
- /* Then reset the channel. */
- V850E_UART_CONFIG (port->line) = 0;
-}
-
-static void
-v850e_uart_set_termios (struct uart_port *port, struct ktermios *termios,
- struct ktermios *old)
-{
- unsigned cflags = termios->c_cflag;
-
- /* Restrict flags to legal values. */
- if ((cflags & CSIZE) != CS7 && (cflags & CSIZE) != CS8)
- /* The new value of CSIZE is invalid, use the old value. */
- cflags = (cflags & ~CSIZE)
- | (old ? (old->c_cflag & CSIZE) : CS8);
-
- termios->c_cflag = cflags;
-
- v850e_uart_configure (port->line, cflags,
- uart_get_baud_rate (port, termios, old,
- v850e_uart_min_baud(),
- v850e_uart_max_baud()));
-}
-
-static const char *v850e_uart_type (struct uart_port *port)
-{
- return port->type == PORT_V850E_UART ? "v850e_uart" : 0;
-}
-
-static void v850e_uart_config_port (struct uart_port *port, int flags)
-{
- if (flags & UART_CONFIG_TYPE)
- port->type = PORT_V850E_UART;
-}
-
-static int
-v850e_uart_verify_port (struct uart_port *port, struct serial_struct *ser)
-{
- if (ser->type != PORT_UNKNOWN && ser->type != PORT_V850E_UART)
- return -EINVAL;
- if (ser->irq != V850E_UART_TX_IRQ (port->line))
- return -EINVAL;
- return 0;
-}
-
-static struct uart_ops v850e_uart_ops = {
- .tx_empty = v850e_uart_tx_empty,
- .get_mctrl = v850e_uart_get_mctrl,
- .set_mctrl = v850e_uart_set_mctrl,
- .start_tx = v850e_uart_start_tx,
- .stop_tx = v850e_uart_stop_tx,
- .stop_rx = v850e_uart_stop_rx,
- .enable_ms = v850e_uart_nop,
- .break_ctl = v850e_uart_break_ctl,
- .startup = v850e_uart_startup,
- .shutdown = v850e_uart_shutdown,
- .set_termios = v850e_uart_set_termios,
- .type = v850e_uart_type,
- .release_port = v850e_uart_nop,
- .request_port = v850e_uart_success,
- .config_port = v850e_uart_config_port,
- .verify_port = v850e_uart_verify_port,
-};
-
-/* Initialization and cleanup. */
-
-static struct uart_driver v850e_uart_driver = {
- .owner = THIS_MODULE,
- .driver_name = "v850e_uart",
- .dev_name = "ttyS",
- .major = TTY_MAJOR,
- .minor = V850E_UART_MINOR_BASE,
- .nr = V850E_UART_NUM_CHANNELS,
- .cons = V850E_UART_CONSOLE,
-};
-
-
-static struct uart_port v850e_uart_ports[V850E_UART_NUM_CHANNELS];
-
-static int __init v850e_uart_init (void)
-{
- int rval;
-
- printk (KERN_INFO "%s on-chip UART\n", V850E_UART_CHIP_NAME);
-
- rval = uart_register_driver (&v850e_uart_driver);
- if (rval == 0) {
- unsigned chan;
-
- for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++) {
- struct uart_port *port = &v850e_uart_ports[chan];
-
- memset (port, 0, sizeof *port);
-
- port->ops = &v850e_uart_ops;
- port->line = chan;
- port->iotype = UPIO_MEM;
- port->flags = UPF_BOOT_AUTOCONF;
-
- /* We actually use multiple IRQs, but the serial
- framework seems to mainly use this for
- informational purposes anyway. Here we use the TX
- irq. */
- port->irq = V850E_UART_TX_IRQ (chan);
-
- /* The serial framework doesn't really use these
- membase/mapbase fields for anything useful, but
- it requires that they be something non-zero to
- consider the port `valid', and also uses them
- for informational purposes. */
- port->membase = (void *)V850E_UART_BASE_ADDR (chan);
- port->mapbase = V850E_UART_BASE_ADDR (chan);
-
- /* The framework insists on knowing the uart's master
- clock freq, though it doesn't seem to do anything
- useful for us with it. We must make it at least
- higher than (the maximum baud rate * 16), otherwise
- the framework will puke during its internal
- calculations, and force the baud rate to be 9600.
- To be accurate though, just repeat the calculation
- we use when actually setting the speed. */
- port->uartclk = v850e_uart_max_clock() * 16;
-
- uart_add_one_port (&v850e_uart_driver, port);
- }
- }
-
- return rval;
-}
-
-static void __exit v850e_uart_exit (void)
-{
- unsigned chan;
-
- for (chan = 0; chan < V850E_UART_NUM_CHANNELS; chan++)
- uart_remove_one_port (&v850e_uart_driver,
- &v850e_uart_ports[chan]);
-
- uart_unregister_driver (&v850e_uart_driver);
-}
-
-module_init (v850e_uart_init);
-module_exit (v850e_uart_exit);
-
-MODULE_AUTHOR ("Miles Bader");
-MODULE_DESCRIPTION ("NEC " V850E_UART_CHIP_NAME " on-chip UART");
-MODULE_LICENSE ("GPL");
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 617efb1640b..be97789fa5f 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -24,13 +24,12 @@
#include <linux/slab.h>
#include <linux/maple.h>
#include <linux/dma-mapping.h>
+#include <linux/delay.h>
#include <asm/cacheflush.h>
#include <asm/dma.h>
#include <asm/io.h>
-#include <asm/mach/dma.h>
-#include <asm/mach/sysasic.h>
-#include <asm/mach/maple.h>
-#include <linux/delay.h>
+#include <mach/dma.h>
+#include <mach/sysasic.h>
MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
@@ -46,14 +45,15 @@ static DECLARE_WORK(maple_vblank_process, maple_vblank_handler);
static LIST_HEAD(maple_waitq);
static LIST_HEAD(maple_sentq);
-static DEFINE_MUTEX(maple_list_lock);
+/* mutex to protect queue of waiting packets */
+static DEFINE_MUTEX(maple_wlist_lock);
static struct maple_driver maple_dummy_driver;
static struct device maple_bus;
static int subdevice_map[MAPLE_PORTS];
static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr;
static unsigned long maple_pnp_time;
-static int started, scanning, liststatus, fullscan;
+static int started, scanning, fullscan;
static struct kmem_cache *maple_queue_cache;
struct maple_device_specify {
@@ -129,35 +129,124 @@ static void maple_release_device(struct device *dev)
kfree(mdev);
}
-/**
+/*
* maple_add_packet - add a single instruction to the queue
- * @mq: instruction to add to waiting queue
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
*/
-void maple_add_packet(struct mapleq *mq)
+int maple_add_packet(struct maple_device *mdev, u32 function, u32 command,
+ size_t length, void *data)
{
- mutex_lock(&maple_list_lock);
- list_add(&mq->list, &maple_waitq);
- mutex_unlock(&maple_list_lock);
+ int locking, ret = 0;
+ void *sendbuf = NULL;
+
+ mutex_lock(&maple_wlist_lock);
+ /* bounce if device already locked */
+ locking = mutex_is_locked(&mdev->mq->mutex);
+ if (locking) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ mutex_lock(&mdev->mq->mutex);
+
+ if (length) {
+ sendbuf = kmalloc(length * 4, GFP_KERNEL);
+ if (!sendbuf) {
+ mutex_unlock(&mdev->mq->mutex);
+ ret = -ENOMEM;
+ goto out;
+ }
+ ((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+ }
+
+ mdev->mq->command = command;
+ mdev->mq->length = length;
+ if (length > 1)
+ memcpy(sendbuf + 4, data, (length - 1) * 4);
+ mdev->mq->sendbuf = sendbuf;
+
+ list_add(&mdev->mq->list, &maple_waitq);
+out:
+ mutex_unlock(&maple_wlist_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(maple_add_packet);
+/*
+ * maple_add_packet_sleeps - add a single instruction to the queue
+ * - waits for lock to be free
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
+ */
+int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
+ u32 command, size_t length, void *data)
+{
+ int locking, ret = 0;
+ void *sendbuf = NULL;
+
+ locking = mutex_lock_interruptible(&mdev->mq->mutex);
+ if (locking) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (length) {
+ sendbuf = kmalloc(length * 4, GFP_KERNEL);
+ if (!sendbuf) {
+ mutex_unlock(&mdev->mq->mutex);
+ ret = -ENOMEM;
+ goto out;
+ }
+ ((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+ }
+
+ mdev->mq->command = command;
+ mdev->mq->length = length;
+ if (length > 1)
+ memcpy(sendbuf + 4, data, (length - 1) * 4);
+ mdev->mq->sendbuf = sendbuf;
+
+ mutex_lock(&maple_wlist_lock);
+ list_add(&mdev->mq->list, &maple_waitq);
+ mutex_unlock(&maple_wlist_lock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(maple_add_packet_sleeps);
+
static struct mapleq *maple_allocq(struct maple_device *mdev)
{
struct mapleq *mq;
mq = kmalloc(sizeof(*mq), GFP_KERNEL);
if (!mq)
- return NULL;
+ goto failed_nomem;
mq->dev = mdev;
mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
- if (!mq->recvbuf) {
- kfree(mq);
- return NULL;
- }
+ if (!mq->recvbuf)
+ goto failed_p2;
+ /*
+ * most devices do not need the mutex - but
+ * anything that injects block reads or writes
+ * will rely on it
+ */
+ mutex_init(&mq->mutex);
return mq;
+
+failed_p2:
+ kfree(mq);
+failed_nomem:
+ return NULL;
}
static struct maple_device *maple_alloc_dev(int port, int unit)
@@ -178,7 +267,6 @@ static struct maple_device *maple_alloc_dev(int port, int unit)
}
mdev->dev.bus = &maple_bus_type;
mdev->dev.parent = &maple_bus;
- mdev->function = 0;
return mdev;
}
@@ -216,7 +304,6 @@ static void maple_build_block(struct mapleq *mq)
*maple_sendptr++ = PHYSADDR(mq->recvbuf);
*maple_sendptr++ =
mq->command | (to << 8) | (from << 16) | (len << 24);
-
while (len-- > 0)
*maple_sendptr++ = *lsendbuf++;
}
@@ -224,22 +311,27 @@ static void maple_build_block(struct mapleq *mq)
/* build up command queue */
static void maple_send(void)
{
- int i;
- int maple_packets;
+ int i, maple_packets = 0;
struct mapleq *mq, *nmq;
if (!list_empty(&maple_sentq))
return;
- if (list_empty(&maple_waitq) || !maple_dma_done())
+ mutex_lock(&maple_wlist_lock);
+ if (list_empty(&maple_waitq) || !maple_dma_done()) {
+ mutex_unlock(&maple_wlist_lock);
return;
- maple_packets = 0;
- maple_sendptr = maple_lastptr = maple_sendbuf;
+ }
+ mutex_unlock(&maple_wlist_lock);
+ maple_lastptr = maple_sendbuf;
+ maple_sendptr = maple_sendbuf;
+ mutex_lock(&maple_wlist_lock);
list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
maple_build_block(mq);
list_move(&mq->list, &maple_sentq);
if (maple_packets++ > MAPLE_MAXPACKETS)
break;
}
+ mutex_unlock(&maple_wlist_lock);
if (maple_packets > 0) {
for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
@@ -247,7 +339,8 @@ static void maple_send(void)
}
}
-static int attach_matching_maple_driver(struct device_driver *driver,
+/* check if there is a driver registered likely to match this device */
+static int check_matching_maple_driver(struct device_driver *driver,
void *devptr)
{
struct maple_driver *maple_drv;
@@ -255,12 +348,8 @@ static int attach_matching_maple_driver(struct device_driver *driver,
mdev = devptr;
maple_drv = to_maple_driver(driver);
- if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
- if (maple_drv->connect(mdev) == 0) {
- mdev->driver = maple_drv;
- return 1;
- }
- }
+ if (mdev->devinfo.function & cpu_to_be32(maple_drv->function))
+ return 1;
return 0;
}
@@ -268,11 +357,6 @@ static void maple_detach_driver(struct maple_device *mdev)
{
if (!mdev)
return;
- if (mdev->driver) {
- if (mdev->driver->disconnect)
- mdev->driver->disconnect(mdev);
- }
- mdev->driver = NULL;
device_unregister(&mdev->dev);
mdev = NULL;
}
@@ -328,8 +412,8 @@ static void maple_attach_driver(struct maple_device *mdev)
mdev->port, mdev->unit, function);
matched =
- bus_for_each_drv(&maple_bus_type, NULL, mdev,
- attach_matching_maple_driver);
+ bus_for_each_drv(&maple_bus_type, NULL, mdev,
+ check_matching_maple_driver);
if (matched == 0) {
/* Driver does not exist yet */
@@ -373,45 +457,48 @@ static int detach_maple_device(struct device *device, void *portptr)
static int setup_maple_commands(struct device *device, void *ignored)
{
+ int add;
struct maple_device *maple_dev = to_maple_dev(device);
if ((maple_dev->interval > 0)
&& time_after(jiffies, maple_dev->when)) {
- maple_dev->when = jiffies + maple_dev->interval;
- maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
- maple_dev->mq->sendbuf = &maple_dev->function;
- maple_dev->mq->length = 1;
- maple_add_packet(maple_dev->mq);
- liststatus++;
+ /* bounce if we cannot lock */
+ add = maple_add_packet(maple_dev,
+ be32_to_cpu(maple_dev->devinfo.function),
+ MAPLE_COMMAND_GETCOND, 1, NULL);
+ if (!add)
+ maple_dev->when = jiffies + maple_dev->interval;
} else {
- if (time_after(jiffies, maple_pnp_time)) {
- maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
- maple_dev->mq->length = 0;
- maple_add_packet(maple_dev->mq);
- liststatus++;
- }
+ if (time_after(jiffies, maple_pnp_time))
+ /* This will also bounce */
+ maple_add_packet(maple_dev, 0,
+ MAPLE_COMMAND_DEVINFO, 0, NULL);
}
-
return 0;
}
/* VBLANK bottom half - implemented via workqueue */
static void maple_vblank_handler(struct work_struct *work)
{
- if (!maple_dma_done())
- return;
- if (!list_empty(&maple_sentq))
+ if (!list_empty(&maple_sentq) || !maple_dma_done())
return;
+
ctrl_outl(0, MAPLE_ENABLE);
- liststatus = 0;
+
bus_for_each_dev(&maple_bus_type, NULL, NULL,
setup_maple_commands);
+
if (time_after(jiffies, maple_pnp_time))
maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
- if (liststatus && list_empty(&maple_sentq)) {
- INIT_LIST_HEAD(&maple_sentq);
+
+ mutex_lock(&maple_wlist_lock);
+ if (!list_empty(&maple_waitq) && list_empty(&maple_sentq)) {
+ mutex_unlock(&maple_wlist_lock);
maple_send();
+ } else {
+ mutex_unlock(&maple_wlist_lock);
}
+
maplebus_dma_reset();
}
@@ -422,8 +509,8 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
struct maple_device *mdev_add;
struct maple_device_specify ds;
+ ds.port = mdev->port;
for (k = 0; k < 5; k++) {
- ds.port = mdev->port;
ds.unit = k + 1;
retval =
bus_for_each_dev(&maple_bus_type, NULL, &ds,
@@ -437,9 +524,9 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
mdev_add = maple_alloc_dev(mdev->port, k + 1);
if (!mdev_add)
return;
- mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
- mdev_add->mq->length = 0;
- maple_add_packet(mdev_add->mq);
+ maple_add_packet(mdev_add, 0, MAPLE_COMMAND_DEVINFO,
+ 0, NULL);
+ /* mark that we are checking sub devices */
scanning = 1;
}
submask = submask >> 1;
@@ -505,6 +592,28 @@ static void maple_response_devinfo(struct maple_device *mdev,
}
}
+static void maple_port_rescan(void)
+{
+ int i;
+ struct maple_device *mdev;
+
+ fullscan = 1;
+ for (i = 0; i < MAPLE_PORTS; i++) {
+ if (checked[i] == false) {
+ fullscan = 0;
+ mdev = baseunits[i];
+ /*
+ * test lock in case scan has failed
+ * but device is still locked
+ */
+ if (mutex_is_locked(&mdev->mq->mutex))
+ mutex_unlock(&mdev->mq->mutex);
+ maple_add_packet(mdev, 0, MAPLE_COMMAND_DEVINFO,
+ 0, NULL);
+ }
+ }
+}
+
/* maple dma end bottom half - implemented via workqueue */
static void maple_dma_handler(struct work_struct *work)
{
@@ -512,7 +621,6 @@ static void maple_dma_handler(struct work_struct *work)
struct maple_device *dev;
char *recvbuf;
enum maple_code code;
- int i;
if (!maple_dma_done())
return;
@@ -522,6 +630,10 @@ static void maple_dma_handler(struct work_struct *work)
recvbuf = mq->recvbuf;
code = recvbuf[0];
dev = mq->dev;
+ kfree(mq->sendbuf);
+ mutex_unlock(&mq->mutex);
+ list_del_init(&mq->list);
+
switch (code) {
case MAPLE_RESPONSE_NONE:
maple_response_none(dev, mq);
@@ -558,26 +670,16 @@ static void maple_dma_handler(struct work_struct *work)
break;
}
}
- INIT_LIST_HEAD(&maple_sentq);
+ /* if scanning is 1 then we have subdevices to check */
if (scanning == 1) {
maple_send();
scanning = 2;
} else
scanning = 0;
-
- if (!fullscan) {
- fullscan = 1;
- for (i = 0; i < MAPLE_PORTS; i++) {
- if (checked[i] == false) {
- fullscan = 0;
- dev = baseunits[i];
- dev->mq->command =
- MAPLE_COMMAND_DEVINFO;
- dev->mq->length = 0;
- maple_add_packet(dev->mq);
- }
- }
- }
+ /*check if we have actually tested all ports yet */
+ if (!fullscan)
+ maple_port_rescan();
+ /* mark that we have been through the first scan */
if (started == 0)
started = 1;
}
@@ -631,7 +733,7 @@ static int match_maple_bus_driver(struct device *devptr,
if (maple_dev->devinfo.function == 0xFFFFFFFF)
return 0;
else if (maple_dev->devinfo.function &
- be32_to_cpu(maple_drv->function))
+ cpu_to_be32(maple_drv->function))
return 1;
return 0;
}
@@ -713,6 +815,9 @@ static int __init maple_bus_init(void)
if (!maple_queue_cache)
goto cleanup_bothirqs;
+ INIT_LIST_HEAD(&maple_waitq);
+ INIT_LIST_HEAD(&maple_sentq);
+
/* setup maple ports */
for (i = 0; i < MAPLE_PORTS; i++) {
checked[i] = false;
@@ -723,9 +828,7 @@ static int __init maple_bus_init(void)
maple_free_dev(mdev[i]);
goto cleanup_cache;
}
- mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
- mdev[i]->mq->length = 0;
- maple_add_packet(mdev[i]->mq);
+ maple_add_packet(mdev[i], 0, MAPLE_COMMAND_DEVINFO, 0, NULL);
subdevice_map[i] = 0;
}
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index 8da7535c0c7..77b44fb48f0 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -1593,7 +1593,7 @@ static int __init m66592_probe(struct platform_device *pdev)
m66592->gadget.ops = &m66592_gadget_ops;
device_initialize(&m66592->gadget.dev);
- dev_set_name(&m66592->gadget, "gadget");
+ dev_set_name(&m66592->gadget.dev, "gadget");
m66592->gadget.is_dualspeed = 1;
m66592->gadget.dev.parent = &pdev->dev;
m66592->gadget.dev.dma_mask = pdev->dev.dma_mask;
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 0ebc1bfd251..a6b55297a7f 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -118,7 +118,6 @@ obj-$(CONFIG_FB_PS3) += ps3fb.o
obj-$(CONFIG_FB_SM501) += sm501fb.o
obj-$(CONFIG_FB_XILINX) += xilinxfb.o
obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o
-obj-$(CONFIG_FB_SH7343VOU) += sh7343_voufb.o
obj-$(CONFIG_FB_OMAP) += omap/
obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o
obj-$(CONFIG_FB_CARMINE) += carminefb.o
diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c
index 5001bd4ef46..38a1e8308c8 100644
--- a/drivers/video/arkfb.c
+++ b/drivers/video/arkfb.c
@@ -1126,11 +1126,8 @@ static int ark_pci_resume (struct pci_dev* dev)
acquire_console_sem();
mutex_lock(&(par->open_lock));
- if (par->ref_count == 0) {
- mutex_unlock(&(par->open_lock));
- release_console_sem();
- return 0;
- }
+ if (par->ref_count == 0)
+ goto fail;
pci_set_power_state(dev, PCI_D0);
pci_restore_state(dev);
@@ -1143,8 +1140,8 @@ static int ark_pci_resume (struct pci_dev* dev)
arkfb_set_par(info);
fb_set_suspend(info, 0);
- mutex_unlock(&(par->open_lock));
fail:
+ mutex_unlock(&(par->open_lock));
release_console_sem();
return 0;
}
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index fbea2bd129c..6fa0b9d5559 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -18,7 +18,7 @@
#include <linux/fb.h>
#include <linux/backlight.h>
-#include <asm/cpu/dac.h>
+#include <cpu/dac.h>
#include <asm/hp6xx.h>
#include <asm/hd64461.h>
diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c
index 72d44dbfce8..738694d2388 100644
--- a/drivers/video/backlight/platform_lcd.c
+++ b/drivers/video/backlight/platform_lcd.c
@@ -92,7 +92,7 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev)
plcd->us = dev;
plcd->pdata = pdata;
- plcd->lcd = lcd_device_register("platform-lcd", dev,
+ plcd->lcd = lcd_device_register(dev_name(dev), dev,
plcd, &platform_lcd_ops);
if (IS_ERR(plcd->lcd)) {
dev_err(dev, "cannot register lcd device\n");
@@ -101,6 +101,8 @@ static int __devinit platform_lcd_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, plcd);
+ platform_lcd_set_power(plcd->lcd, FB_BLANK_NORMAL);
+
return 0;
err_mem:
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index d7822af0e00..ef7870f5ea0 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -24,6 +24,7 @@
#include <asm/hardware.h>
#include <asm/parisc-device.h>
#include <asm/cacheflush.h>
+#include <asm/grfioctl.h>
#include "../sticore.h"
@@ -725,6 +726,7 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti,
{
struct sti_cooked_rom *cooked;
struct sti_rom *raw = NULL;
+ unsigned long revno;
cooked = kmalloc(sizeof *cooked, GFP_KERNEL);
if (!cooked)
@@ -767,9 +769,35 @@ static int __devinit sti_read_rom(int wordmode, struct sti_struct *sti,
sti->graphics_id[1] = raw->graphics_id[1];
sti_dump_rom(raw);
-
+
+ /* check if the ROM routines in this card are compatible */
+ if (wordmode || sti->graphics_id[1] != 0x09A02587)
+ goto ok;
+
+ revno = (raw->revno[0] << 8) | raw->revno[1];
+
+ switch (sti->graphics_id[0]) {
+ case S9000_ID_HCRX:
+ /* HyperA or HyperB ? */
+ if (revno == 0x8408 || revno == 0x840b)
+ goto msg_not_supported;
+ break;
+ case CRT_ID_THUNDER:
+ if (revno == 0x8509)
+ goto msg_not_supported;
+ break;
+ case CRT_ID_THUNDER2:
+ if (revno == 0x850c)
+ goto msg_not_supported;
+ }
+ok:
return 1;
+msg_not_supported:
+ printk(KERN_ERR "Sorry, this GSC/STI card is not yet supported.\n");
+ printk(KERN_ERR "Please see http://parisc-linux.org/faq/"
+ "graphics-howto.html for more info.\n");
+ /* fall through */
out_err:
kfree(raw);
kfree(cooked);
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index 2e552d5bbb5..f89c3cce1e0 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -87,6 +87,8 @@ static int gbe_revision;
static int ypan, ywrap;
static uint32_t pseudo_palette[16];
+static uint32_t gbe_cmap[256];
+static int gbe_turned_on; /* 0 turned off, 1 turned on */
static char *mode_option __initdata = NULL;
@@ -208,6 +210,8 @@ void gbe_turn_off(void)
int i;
unsigned int val, x, y, vpixen_off;
+ gbe_turned_on = 0;
+
/* check if pixel counter is on */
val = gbe->vt_xy;
if (GET_GBE_FIELD(VT_XY, FREEZE, val) == 1)
@@ -371,6 +375,22 @@ static void gbe_turn_on(void)
}
if (i == 10000)
printk(KERN_ERR "gbefb: turn on DMA timed out\n");
+
+ gbe_turned_on = 1;
+}
+
+static void gbe_loadcmap(void)
+{
+ int i, j;
+
+ for (i = 0; i < 256; i++) {
+ for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++)
+ udelay(10);
+ if (j == 1000)
+ printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
+
+ gbe->cmap[i] = gbe_cmap[i];
+ }
}
/*
@@ -382,6 +402,7 @@ static int gbefb_blank(int blank, struct fb_info *info)
switch (blank) {
case FB_BLANK_UNBLANK: /* unblank */
gbe_turn_on();
+ gbe_loadcmap();
break;
case FB_BLANK_NORMAL: /* blank */
@@ -796,16 +817,10 @@ static int gbefb_set_par(struct fb_info *info)
gbe->gmap[i] = (i << 24) | (i << 16) | (i << 8);
/* Initialize the color map */
- for (i = 0; i < 256; i++) {
- int j;
-
- for (j = 0; j < 1000 && gbe->cm_fifo >= 63; j++)
- udelay(10);
- if (j == 1000)
- printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
+ for (i = 0; i < 256; i++)
+ gbe_cmap[i] = (i << 8) | (i << 16) | (i << 24);
- gbe->cmap[i] = (i << 8) | (i << 16) | (i << 24);
- }
+ gbe_loadcmap();
return 0;
}
@@ -855,14 +870,17 @@ static int gbefb_setcolreg(unsigned regno, unsigned red, unsigned green,
blue >>= 8;
if (info->var.bits_per_pixel <= 8) {
- /* wait for the color map FIFO to have a free entry */
- for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++)
- udelay(10);
- if (i == 1000) {
- printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
- return 1;
+ gbe_cmap[regno] = (red << 24) | (green << 16) | (blue << 8);
+ if (gbe_turned_on) {
+ /* wait for the color map FIFO to have a free entry */
+ for (i = 0; i < 1000 && gbe->cm_fifo >= 63; i++)
+ udelay(10);
+ if (i == 1000) {
+ printk(KERN_ERR "gbefb: cmap FIFO timeout\n");
+ return 1;
+ }
+ gbe->cmap[regno] = gbe_cmap[regno];
}
- gbe->cmap[regno] = (red << 24) | (green << 16) | (blue << 8);
} else if (regno < 16) {
switch (info->var.bits_per_pixel) {
case 15:
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index 392a8be6aa7..e6467cf9f19 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -27,7 +27,7 @@
#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/hd64461.h>
-#include <asm/cpu/dac.h>
+#include <cpu/dac.h>
#define WIDTH 640
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index 8c863a7f654..0a0fd48a856 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -58,18 +58,18 @@
#ifdef CONFIG_SH_DREAMCAST
#include <asm/machvec.h>
-#include <asm/mach/sysasic.h>
+#include <mach-dreamcast/mach/sysasic.h>
#endif
#ifdef CONFIG_SH_DMA
#include <linux/pagemap.h>
-#include <asm/mach/dma.h>
+#include <mach/dma.h>
#include <asm/dma.h>
#endif
#ifdef CONFIG_SH_STORE_QUEUES
#include <linux/uaccess.h>
-#include <asm/cpu/sq.h>
+#include <cpu/sq.h>
#endif
#ifndef PCI_DEVICE_ID_NEC_NEON250
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
index 536ab11623f..4a484ee98f8 100644
--- a/drivers/video/vt8623fb.c
+++ b/drivers/video/vt8623fb.c
@@ -853,11 +853,8 @@ static int vt8623_pci_resume(struct pci_dev* dev)
acquire_console_sem();
mutex_lock(&(par->open_lock));
- if (par->ref_count == 0) {
- mutex_unlock(&(par->open_lock));
- release_console_sem();
- return 0;
- }
+ if (par->ref_count == 0)
+ goto fail;
pci_set_power_state(dev, PCI_D0);
pci_restore_state(dev);
@@ -870,8 +867,8 @@ static int vt8623_pci_resume(struct pci_dev* dev)
vt8623fb_set_par(info);
fb_set_suspend(info, 0);
- mutex_unlock(&(par->open_lock));
fail:
+ mutex_unlock(&(par->open_lock));
release_console_sem();
return 0;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index eaa3f2a79ff..ccd6c530782 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -420,7 +420,7 @@ static int __devinit detect_cru_service(void)
static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
void *data)
{
- static unsigned long rom_pl;
+ unsigned long rom_pl;
static int die_nmi_called;
if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)