From 25732ad493b22b7d9f0d250c5a9ad17219f96a47 Mon Sep 17 00:00:00 2001
From: Bruce Losure <blosure@americas.sgi.com>
Date: Fri, 2 Sep 2005 15:16:35 -0500
Subject: [IA64] Altix patch for fpga reset

1) workaround a h/w reset issue
2) to improve the determination of FPGA-based h/w in
   the arch/ia64/sn/kernel/tiocx code.

Signed-off-by: Bruce Losure <blosure@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/l1.h     | 12 ++++++++++++
 include/asm-ia64/sn/sn_sal.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-ia64/sn/tiocx.h  |  3 ++-
 3 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-ia64/sn/l1.h b/include/asm-ia64/sn/l1.h
index 2e5f0aa3888..e3b819110d4 100644
--- a/include/asm-ia64/sn/l1.h
+++ b/include/asm-ia64/sn/l1.h
@@ -35,4 +35,16 @@
 #define L1_BRICKTYPE_ATHENA	0x2b            /* + */
 #define L1_BRICKTYPE_DAYTONA	0x7a            /* z */
 
+/* board type response codes */
+#define L1_BOARDTYPE_IP69       0x0100          /* CA */
+#define L1_BOARDTYPE_IP63       0x0200          /* CB */
+#define L1_BOARDTYPE_BASEIO     0x0300          /* IB */
+#define L1_BOARDTYPE_PCIE2SLOT  0x0400          /* IC */
+#define L1_BOARDTYPE_PCIX3SLOT  0x0500          /* ID */
+#define L1_BOARDTYPE_PCIXPCIE4SLOT 0x0600       /* IE */
+#define L1_BOARDTYPE_ABACUS     0x0700          /* AB */
+#define L1_BOARDTYPE_DAYTONA    0x0800          /* AD */
+#define L1_BOARDTYPE_INVAL      (-1)            /* invalid brick type */
+
+
 #endif /* _ASM_IA64_SN_L1_H */
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index e67825ad193..6f96ae8b4fb 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -47,6 +47,7 @@
 #define  SN_SAL_CONSOLE_PUTB			   0x02000028
 #define  SN_SAL_CONSOLE_XMIT_CHARS		   0x0200002a
 #define  SN_SAL_CONSOLE_READC			   0x0200002b
+#define  SN_SAL_SYSCTL_OP			   0x02000030
 #define  SN_SAL_SYSCTL_MODID_GET	           0x02000031
 #define  SN_SAL_SYSCTL_GET                         0x02000032
 #define  SN_SAL_SYSCTL_IOBRICK_MODULE_GET          0x02000033
@@ -97,6 +98,13 @@
 #define SAL_INTR_ALLOC		1
 #define SAL_INTR_FREE		2
 
+/*
+ * operations available on the generic SN_SAL_SYSCTL_OP
+ * runtime service
+ */
+#define SAL_SYSCTL_OP_IOBOARD		0x0001  /*  retrieve board type */
+#define SAL_SYSCTL_OP_TIO_JLCK_RST      0x0002  /* issue TIO clock reset */
+
 /*
  * IRouter (i.e. generalized system controller) operations
  */
@@ -876,6 +884,41 @@ ia64_sn_sysctl_event_init(nasid_t nasid)
         return (int) rv.v0;
 }
 
+/*
+ * Ask the system controller on the specified nasid to reset
+ * the CX corelet clock.  Only valid on TIO nodes.
+ */
+static inline int
+ia64_sn_sysctl_tio_clock_reset(nasid_t nasid)
+{
+	struct ia64_sal_retval rv;
+	SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_TIO_JLCK_RST,
+			nasid, 0, 0, 0, 0, 0);
+	if (rv.status != 0)
+		return (int)rv.status;
+	if (rv.v0 != 0)
+		return (int)rv.v0;
+
+	return 0;
+}
+
+/*
+ * Get the associated ioboard type for a given nasid.
+ */
+static inline int
+ia64_sn_sysctl_ioboard_get(nasid_t nasid)
+{
+        struct ia64_sal_retval rv;
+        SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_IOBOARD,
+                        nasid, 0, 0, 0, 0, 0);
+        if (rv.v0 != 0)
+                return (int)rv.v0;
+        if (rv.v1 != 0)
+                return (int)rv.v1;
+
+        return 0;
+}
+
 /**
  * ia64_sn_get_fit_compt - read a FIT entry from the PROM header
  * @nasid: NASID of node to read
diff --git a/include/asm-ia64/sn/tiocx.h b/include/asm-ia64/sn/tiocx.h
index c5447a50450..5699e75e502 100644
--- a/include/asm-ia64/sn/tiocx.h
+++ b/include/asm-ia64/sn/tiocx.h
@@ -19,6 +19,7 @@ struct cx_id_s {
 
 struct cx_dev {
 	struct cx_id_s cx_id;
+	int bt;				/* board/blade type */
 	void *soft;			/* driver specific */
 	struct hubdev_info *hubdev;
 	struct device dev;
@@ -59,7 +60,7 @@ struct cx_drv {
 extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int);
 extern void tiocx_irq_free(struct sn_irq_info *);
 extern int cx_device_unregister(struct cx_dev *);
-extern int cx_device_register(nasid_t, int, int, struct hubdev_info *);
+extern int cx_device_register(nasid_t, int, int, struct hubdev_info *, int);
 extern int cx_driver_unregister(struct cx_drv *);
 extern int cx_driver_register(struct cx_drv *);
 extern uint64_t tiocx_dma_addr(uint64_t addr);
-- 
cgit v1.2.3-70-g09d2


From a607c38971fd078865fa9bef39e6c1d4435680c8 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Thu, 1 Sep 2005 14:01:37 -0500
Subject: [IA64-SGI] get XPC to cleanly disengage from remote memory references

When XPC is being shutdown (i.e., rmmod, reboot) it doesn't ensure that
other partitions with whom it was connected have completely disengaged
from any attempt at cross-partition memory references. This can lead to
MCAs in any of these other partitions when the partition is reset.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc.h           | 288 +++++++++++++++++++++++++++++++---
 arch/ia64/sn/kernel/xpc_channel.c   | 216 +++++++++++++------------
 arch/ia64/sn/kernel/xpc_main.c      | 242 ++++++++++++++++++++--------
 arch/ia64/sn/kernel/xpc_partition.c | 304 ++++++++++++++++++++++++++++++------
 include/asm-ia64/sn/xp.h            |  10 +-
 5 files changed, 822 insertions(+), 238 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index d0ee635daf2..565822ab3d0 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -57,7 +57,7 @@
 #define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
 
 #define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
-#define XPC_HB_CHECK_DEFAULT_TIMEOUT	20	/* check HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
 
 /* define the process name of HB checker and the CPU it is pinned to */
 #define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
@@ -67,11 +67,6 @@
 #define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
 
 
-#define XPC_HB_ALLOWED(_p, _v)	((_v)->heartbeating_to_mask & (1UL << (_p)))
-#define XPC_ALLOW_HB(_p, _v)	(_v)->heartbeating_to_mask |= (1UL << (_p))
-#define XPC_DISALLOW_HB(_p, _v)	(_v)->heartbeating_to_mask &= (~(1UL << (_p)))
-
-
 /*
  * Reserved Page provided by SAL.
  *
@@ -88,14 +83,38 @@ struct xpc_rsvd_page {
 	u8 version;
 	u8 pad[6];		/* pad to u64 align */
 	volatile u64 vars_pa;
+	struct timespec stamp;	/* time when reserved page was initialized */
 	u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
 	u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
 };
-#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
 
 #define XPC_RSVD_PAGE_ALIGNED_SIZE \
 			(L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
 
+#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
+
+#define XPC_SUPPORTS_RP_STAMP(_version) \
+			(_version >= _XPC_VERSION(1,1))
+
+/*
+ * compare stamps - the return value is:
+ *
+ *	< 0,	if stamp1 < stamp2
+ *	= 0,	if stamp1 == stamp2
+ *	> 0,	if stamp1 > stamp2
+ */
+static inline int
+xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
+{
+	int ret;
+
+
+	if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
+		ret = stamp1->tv_nsec - stamp2->tv_nsec;
+	}
+	return ret;
+}
+
 
 /*
  * Define the structures by which XPC variables can be exported to other
@@ -121,12 +140,61 @@ struct xpc_vars {
 	u64 vars_part_pa;
 	u64 amos_page_pa;	/* paddr of page of AMOs from MSPEC driver */
 	AMO_t *amos_page;	/* vaddr of page of AMOs from MSPEC driver */
-	AMO_t *act_amos;	/* pointer to the first activation AMO */
 };
-#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
 
 #define XPC_VARS_ALIGNED_SIZE  (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
 
+#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
+
+#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
+			(_version >= _XPC_VERSION(3,1))
+
+
+static inline int
+xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
+{
+	return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
+}
+
+static inline void
+xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
+{
+	u64 old_mask, new_mask;
+
+	do {
+		old_mask = vars->heartbeating_to_mask;
+		new_mask = (old_mask | (1UL << partid));
+	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+							old_mask);
+}
+
+static inline void
+xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
+{
+	u64 old_mask, new_mask;
+
+	do {
+		old_mask = vars->heartbeating_to_mask;
+		new_mask = (old_mask & ~(1UL << partid));
+	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+							old_mask);
+}
+
+
+/*
+ * The AMOs page consists of a number of AMO variables which are divided into
+ * four groups, The first two groups are used to identify an IRQ's sender.
+ * These two groups consist of 64 and 16 AMO variables respectively. The last
+ * two groups, consisting of just one AMO variable each, are used to identify
+ * the remote partitions that are currently engaged (from the viewpoint of
+ * the XPC running on the remote partition).
+ */
+#define XPC_NOTIFY_IRQ_AMOS	   0
+#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
+#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
+#define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
+
+
 /*
  * The following structure describes the per partition specific variables.
  *
@@ -358,7 +426,7 @@ struct xpc_channel {
 	void *key;			/* pointer to user's key */
 
 	struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
-	struct semaphore teardown_sema;    /* wait for teardown completion */
+	struct semaphore wdisconnect_sema; /* wait for channel disconnect */
 
 	struct xpc_openclose_args *local_openclose_args; /* args passed on */
 					/* opening or closing of channel */
@@ -410,6 +478,7 @@ struct xpc_channel {
 
 #define	XPC_C_DISCONNECTED	0x00002000 /* channel is disconnected */
 #define	XPC_C_DISCONNECTING	0x00004000 /* channel is being disconnected */
+#define	XPC_C_WDISCONNECT	0x00008000 /* waiting for channel disconnect */
 
 
@@ -422,6 +491,8 @@ struct xpc_partition {
 
 	/* XPC HB infrastructure */
 
+	u8 remote_rp_version;		/* version# of partition's rsvd pg */
+	struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
 	u64 remote_rp_pa;		/* phys addr of partition's rsvd pg */
 	u64 remote_vars_pa;		/* phys addr of partition's vars */
 	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
@@ -432,10 +503,14 @@ struct xpc_partition {
 	u32 act_IRQ_rcvd;		/* IRQs since activation */
 	spinlock_t act_lock;		/* protect updating of act_state */
 	u8 act_state;			/* from XPC HB viewpoint */
+	u8 remote_vars_version;		/* version# of partition's vars */
 	enum xpc_retval reason;		/* reason partition is deactivating */
 	int reason_line;		/* line# deactivation initiated from */
 	int reactivate_nasid;		/* nasid in partition to reactivate */
 
+	unsigned long disengage_request_timeout; /* timeout in XPC_TICKS */
+	struct timer_list disengage_request_timer;
+
 
 	/* XPC infrastructure referencing and teardown control */
 
@@ -454,6 +529,7 @@ struct xpc_partition {
 
 	u8 nchannels;		   /* #of defined channels supported */
 	atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
+	atomic_t nchannels_engaged;/* #of channels engaged with remote part */
 	struct xpc_channel *channels;/* array of channel structures */
 
 	void *local_GPs_base;	  /* base address of kmalloc'd space */
@@ -518,6 +594,7 @@ struct xpc_partition {
 #define XPC_P_TORNDOWN		0x03	/* infrastructure is torndown */
 
 
+
 /*
  * struct xpc_partition IPI_timer #of seconds to wait before checking for
  * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
@@ -526,6 +603,13 @@ struct xpc_partition {
 #define XPC_P_DROPPED_IPI_WAIT	(0.25 * HZ)
 
 
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_REQUEST_TIMELIMIT 90
+
+/* interval in seconds to print 'waiting disengagement' messages */
+#define XPC_DISENGAGE_PRINTMSG_INTERVAL		10
+
+
 #define XPC_PARTID(_p)	((partid_t) ((_p) - &xpc_partitions[0]))
 
 
@@ -550,8 +634,6 @@ extern void xpc_activate_partition(struct xpc_partition *);
 
 /* found in xpc_partition.c */
 extern int xpc_exiting;
-extern int xpc_hb_interval;
-extern int xpc_hb_check_interval;
 extern struct xpc_vars *xpc_vars;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern struct xpc_vars_part *xpc_vars_part;
@@ -561,6 +643,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
 extern void xpc_allow_IPI_ops(void);
 extern void xpc_restrict_IPI_ops(void);
 extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
 extern void xpc_discovery(void);
@@ -585,8 +668,8 @@ extern void xpc_connected_callout(struct xpc_channel *);
 extern void xpc_deliver_msg(struct xpc_channel *);
 extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 					enum xpc_retval, unsigned long *);
-extern void xpc_disconnected_callout(struct xpc_channel *);
-extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval);
+extern void xpc_disconnecting_callout(struct xpc_channel *);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
 extern void xpc_teardown_infrastructure(struct xpc_partition *);
 
 
@@ -673,6 +756,157 @@ xpc_part_ref(struct xpc_partition *part)
 
 
+/*
+ * This next set of inlines are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static inline void
+xpc_mark_partition_engaged(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+				(XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
+
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
+						(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+				variable), xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_mark_partition_disengaged(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+				(XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
+
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+						~(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+				variable), xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_request_partition_disengage(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+				(XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
+						(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+				variable), xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_cancel_partition_disengage_request(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+				(XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+						~(1UL << sn_partition_id));
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IPIs and AMOs to it until the heartbeat times out.
+	 */
+	(void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+				variable), xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static inline u64
+xpc_partition_engaged(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+
+	/* return our partition's AMO variable ANDed with partid_mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
+								partid_mask);
+}
+
+static inline u64
+xpc_partition_disengage_requested(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+
+	/* return our partition's AMO variable ANDed with partid_mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
+								partid_mask);
+}
+
+static inline void
+xpc_clear_partition_engaged(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+
+	/* clear bit(s) based on partid_mask in our partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+								~partid_mask);
+}
+
+static inline void
+xpc_clear_partition_disengage_request(u64 partid_mask)
+{
+	AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+
+	/* clear bit(s) based on partid_mask in our partition's AMO */
+	FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+								~partid_mask);
+}
+
+
+
 /*
  * The following set of macros and inlines are used for the sending and
  * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
@@ -722,13 +956,13 @@ xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
  * Flag the appropriate AMO variable and send an IPI to the specified node.
  */
 static inline void
-xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid,
+xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
 			int to_phys_cpuid)
 {
 	int w_index = XPC_NASID_W_INDEX(from_nasid);
 	int b_index = XPC_NASID_B_INDEX(from_nasid);
-	AMO_t *amos = (AMO_t *) __va(amos_page +
-					(XP_MAX_PARTITIONS * sizeof(AMO_t)));
+	AMO_t *amos = (AMO_t *) __va(amos_page_pa +
+				(XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
 
 
 	(void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
@@ -756,6 +990,13 @@ xpc_IPI_send_reactivate(struct xpc_partition *part)
 				xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
 }
 
+static inline void
+xpc_IPI_send_disengage(struct xpc_partition *part)
+{
+	xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
+			part->remote_act_nasid, part->remote_act_phys_cpuid);
+}
+
 
 /*
  * IPIs associated with SGI_XPC_NOTIFY IRQ.
@@ -903,17 +1144,18 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
  * cacheable mapping for the entire region. This will prevent speculative
  * reading of cached copies of our lines from being issued which will cause
  * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c)
- * and an additional 16 AMO variables for partition activation (xpc_hb.c).
+ * (XP_MAX_PARTITIONS) AMO variables for message notification and an
+ * additional 16 (XP_NASID_MASK_WORDS) AMO variables for partition activation
+ * and 2 AMO variables for partition deactivation.
  */
 static inline AMO_t *
-xpc_IPI_init(partid_t partid)
+xpc_IPI_init(int index)
 {
-	AMO_t *part_amo = xpc_vars->amos_page + partid;
+	AMO_t *amo = xpc_vars->amos_page + index;
 
 
-	xpc_IPI_receive(part_amo);
-	return part_amo;
+	(void) xpc_IPI_receive(amo);	/* clear AMO variable */
+	return amo;
 }
 
 
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 94698bea7be..195ac1b8e26 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -57,6 +57,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
 
 		spin_lock_init(&ch->lock);
 		sema_init(&ch->msg_to_pull_sema, 1);	/* mutex */
+		sema_init(&ch->wdisconnect_sema, 0);	/* event wait */
 
 		atomic_set(&ch->n_on_msg_allocate_wq, 0);
 		init_waitqueue_head(&ch->msg_allocate_wq);
@@ -166,6 +167,7 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 	xpc_initialize_channels(part, partid);
 
 	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
 
 
 	/* local_IPI_amo were set to 0 by an earlier memset() */
@@ -555,8 +557,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
 		sema_init(&ch->notify_queue[i].sema, 0);
 	}
 
-	sema_init(&ch->teardown_sema, 0);	/* event wait */
-
 	spin_lock_irqsave(&ch->lock, irq_flags);
 	ch->flags |= XPC_C_SETUP;
 	spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -625,6 +625,55 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
+{
+	struct xpc_notify *notify;
+	u8 notify_type;
+	s64 get = ch->w_remote_GP.get - 1;
+
+
+	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+		notify = &ch->notify_queue[get % ch->local_nentries];
+
+		/*
+		 * See if the notify entry indicates it was associated with
+		 * a message who's sender wants to be notified. It is possible
+		 * that it is, but someone else is doing or has done the
+		 * notification.
+		 */
+		notify_type = notify->type;
+		if (notify_type == 0 ||
+				cmpxchg(&notify->type, notify_type, 0) !=
+								notify_type) {
+			continue;
+		}
+
+		DBUG_ON(notify_type != XPC_N_CALL);
+
+		atomic_dec(&ch->n_to_notify);
+
+		if (notify->func != NULL) {
+			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
+				"msg_number=%ld, partid=%d, channel=%d\n",
+				(void *) notify, get, ch->partid, ch->number);
+
+			notify->func(reason, ch->partid, ch->number,
+								notify->key);
+
+			dev_dbg(xpc_chan, "notify->func() returned, "
+				"notify=0x%p, msg_number=%ld, partid=%d, "
+				"channel=%d\n", (void *) notify, get,
+				ch->partid, ch->number);
+		}
+	}
+}
+
+
 /*
  * Free up message queues and other stuff that were allocated for the specified
  * channel.
@@ -669,9 +718,6 @@ xpc_free_msgqueues(struct xpc_channel *ch)
 		ch->remote_msgqueue = NULL;
 		kfree(ch->notify_queue);
 		ch->notify_queue = NULL;
-
-		/* in case someone is waiting for the teardown to complete */
-		up(&ch->teardown_sema);
 	}
 }
 
@@ -683,7 +729,7 @@ static void
 xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 {
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
-	u32 ch_flags = ch->flags;
+	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
 
 
 	DBUG_ON(!spin_is_locked(&ch->lock));
@@ -701,12 +747,13 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	}
 	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
 
-	/* it's now safe to free the channel's message queues */
-
-	xpc_free_msgqueues(ch);
-	DBUG_ON(ch->flags & XPC_C_SETUP);
+	if (part->act_state == XPC_P_DEACTIVATING) {
+		/* can't proceed until the other side disengages from us */
+		if (xpc_partition_engaged(1UL << ch->partid)) {
+			return;
+		}
 
-	if (part->act_state != XPC_P_DEACTIVATING) {
+	} else {
 
 		/* as long as the other side is up do the full protocol */
 
@@ -724,16 +771,33 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 		}
 	}
 
+	/* wake those waiting for notify completion */
+	if (atomic_read(&ch->n_to_notify) > 0) {
+		/* >>> we do callout while holding ch->lock */
+		xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+	}
+
 	/* both sides are disconnected now */
 
-	ch->flags = XPC_C_DISCONNECTED;	/* clear all flags, but this one */
+	/* it's now safe to free the channel's message queues */
+	xpc_free_msgqueues(ch);
+
+	/* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
 	atomic_dec(&part->nchannels_active);
 
-	if (ch_flags & XPC_C_WASCONNECTED) {
+	if (channel_was_connected) {
 		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
 			"reason=%d\n", ch->number, ch->partid, ch->reason);
 	}
+
+	/* wake the thread that is waiting for this channel to disconnect */
+	if (ch->flags & XPC_C_WDISCONNECT) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		up(&ch->wdisconnect_sema);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
 }
 
 
@@ -764,7 +828,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 		/*
 		 * If RCLOSEREQUEST is set, we're probably waiting for
 		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
-		 * with this RCLOSEQREUQEST in the IPI_flags.
+		 * with this RCLOSEREQUEST in the IPI_flags.
 		 */
 
 		if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -852,7 +916,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 			"channel=%d\n", args->msg_size, args->local_nentries,
 			ch->partid, ch->number);
 
-		if ((ch->flags & XPC_C_DISCONNECTING) ||
+		if ((ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) ||
 					part->act_state == XPC_P_DEACTIVATING) {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
@@ -1039,55 +1103,6 @@ xpc_connect_channel(struct xpc_channel *ch)
 }
 
 
-/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
-{
-	struct xpc_notify *notify;
-	u8 notify_type;
-	s64 get = ch->w_remote_GP.get - 1;
-
-
-	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
-		notify = &ch->notify_queue[get % ch->local_nentries];
-
-		/*
-		 * See if the notify entry indicates it was associated with
-		 * a message who's sender wants to be notified. It is possible
-		 * that it is, but someone else is doing or has done the
-		 * notification.
-		 */
-		notify_type = notify->type;
-		if (notify_type == 0 ||
-				cmpxchg(&notify->type, notify_type, 0) !=
-								notify_type) {
-			continue;
-		}
-
-		DBUG_ON(notify_type != XPC_N_CALL);
-
-		atomic_dec(&ch->n_to_notify);
-
-		if (notify->func != NULL) {
-			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-				"msg_number=%ld, partid=%d, channel=%d\n",
-				(void *) notify, get, ch->partid, ch->number);
-
-			notify->func(reason, ch->partid, ch->number,
-								notify->key);
-
-			dev_dbg(xpc_chan, "notify->func() returned, "
-				"notify=0x%p, msg_number=%ld, partid=%d, "
-				"channel=%d\n", (void *) notify, get,
-				ch->partid, ch->number);
-		}
-	}
-}
-
-
 /*
  * Clear some of the msg flags in the local message queue.
  */
@@ -1240,6 +1255,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
 	u64 IPI_amo, IPI_flags;
 	struct xpc_channel *ch;
 	int ch_number;
+	u32 ch_flags;
 
 
 	IPI_amo = xpc_get_IPI_flags(part);
@@ -1266,8 +1282,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
 			xpc_process_openclose_IPI(part, ch_number, IPI_flags);
 		}
 
+		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
 
-		if (ch->flags & XPC_C_DISCONNECTING) {
+		if (ch_flags & XPC_C_DISCONNECTING) {
 			spin_lock_irqsave(&ch->lock, irq_flags);
 			xpc_process_disconnect(ch, &irq_flags);
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1278,9 +1295,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
 			continue;
 		}
 
-		if (!(ch->flags & XPC_C_CONNECTED)) {
-			if (!(ch->flags & XPC_C_OPENREQUEST)) {
-				DBUG_ON(ch->flags & XPC_C_SETUP);
+		if (!(ch_flags & XPC_C_CONNECTED)) {
+			if (!(ch_flags & XPC_C_OPENREQUEST)) {
+				DBUG_ON(ch_flags & XPC_C_SETUP);
 				(void) xpc_connect_channel(ch);
 			} else {
 				spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1305,8 +1322,8 @@ xpc_process_channel_activity(struct xpc_partition *part)
 
 
 /*
- * XPC's heartbeat code calls this function to inform XPC that a partition has
- * gone down.  XPC responds by tearing down the XPartition Communication
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down.  XPC responds by tearing down the XPartition Communication
  * infrastructure used for the just downed partition.
  *
  * XPC's heartbeat code will never call this function and xpc_partition_up()
@@ -1314,7 +1331,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
  * at the same time.
  */
 void
-xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
+xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
 {
 	unsigned long irq_flags;
 	int ch_number;
@@ -1330,12 +1347,11 @@ xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
 	}
 
 
-	/* disconnect all channels associated with the downed partition */
+	/* disconnect channels associated with the partition going down */
 
 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
 		ch = &part->channels[ch_number];
 
-
 		xpc_msgqueue_ref(ch);
 		spin_lock_irqsave(&ch->lock, irq_flags);
 
@@ -1370,6 +1386,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part)
 	 * this partition.
 	 */
 
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
 	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
 	DBUG_ON(part->setup_state != XPC_P_SETUP);
 	part->setup_state = XPC_P_WTEARDOWN;
@@ -1506,8 +1523,12 @@ xpc_initiate_disconnect(int ch_number)
 
 			spin_lock_irqsave(&ch->lock, irq_flags);
 
-			XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
+			if (!(ch->flags & XPC_C_DISCONNECTED)) {
+				ch->flags |= XPC_C_WDISCONNECT;
+
+				XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
 								&irq_flags);
+			}
 
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
@@ -1523,8 +1544,9 @@ xpc_initiate_disconnect(int ch_number)
 /*
  * To disconnect a channel, and reflect it back to all who may be waiting.
  *
- * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
- * >>> xpc_free_msgqueues().
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
  *
  * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
  */
@@ -1532,7 +1554,7 @@ void
 xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 			enum xpc_retval reason, unsigned long *irq_flags)
 {
-	u32 flags;
+	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
 
 
 	DBUG_ON(!spin_is_locked(&ch->lock));
@@ -1547,61 +1569,53 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 
 	XPC_SET_REASON(ch, reason, line);
 
-	flags = ch->flags;
+	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
 	/* some of these may not have been set */
 	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
 			XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
 			XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
 	xpc_IPI_send_closerequest(ch, irq_flags);
 
-	if (flags & XPC_C_CONNECTED) {
+	if (channel_was_connected) {
 		ch->flags |= XPC_C_WASCONNECTED;
 	}
 
+	spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+	/* wake all idle kthreads so they can exit */
 	if (atomic_read(&ch->kthreads_idle) > 0) {
-		/* wake all idle kthreads so they can exit */
 		wake_up_all(&ch->idle_wq);
 	}
 
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-
-
 	/* wake those waiting to allocate an entry from the local msg queue */
-
 	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
 		wake_up(&ch->msg_allocate_wq);
 	}
 
-	/* wake those waiting for notify completion */
-
-	if (atomic_read(&ch->n_to_notify) > 0) {
-		xpc_notify_senders(ch, reason, ch->w_local_GP.put);
-	}
-
 	spin_lock_irqsave(&ch->lock, *irq_flags);
 }
 
 
 void
-xpc_disconnected_callout(struct xpc_channel *ch)
+xpc_disconnecting_callout(struct xpc_channel *ch)
 {
 	/*
-	 * Let the channel's registerer know that the channel is now
+	 * Let the channel's registerer know that the channel is being
 	 * disconnected. We don't want to do this if the registerer was never
-	 * informed of a connection being made, unless the disconnect was for
-	 * abnormal reasons.
+	 * informed of a connection being made.
 	 */
 
 	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
-			"channel=%d\n", ch->reason, ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
+			" partid=%d, channel=%d\n", ch->partid, ch->number);
 
-		ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key);
+		ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
+								ch->key);
 
-		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
-			"channel=%d\n", ch->reason, ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() returned, reason="
+			"xpcDisconnecting, partid=%d, channel=%d\n",
+			ch->partid, ch->number);
 	}
 }
 
@@ -1848,7 +1862,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
 			xpc_notify_func func, void *key)
 {
 	enum xpc_retval ret = xpcSuccess;
-	struct xpc_notify *notify = NULL;   // >>> to keep the compiler happy!!
+	struct xpc_notify *notify = notify;
 	s64 put, msg_number = msg->number;
 
 
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index bb1d5cf3044..feece200b3c 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -54,6 +54,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/reboot.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/uaccess.h>
@@ -82,11 +83,13 @@ struct device *xpc_chan = &xpc_chan_dbg_subname;
 
 /* systune related variables for /proc/sys directories */
 
-static int xpc_hb_min = 1;
-static int xpc_hb_max = 10;
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
 
-static int xpc_hb_check_min = 10;
-static int xpc_hb_check_max = 120;
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
 
 static ctl_table xpc_sys_xpc_hb_dir[] = {
 	{
@@ -99,7 +102,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
 		&proc_dointvec_minmax,
 		&sysctl_intvec,
 		NULL,
-		&xpc_hb_min, &xpc_hb_max
+		&xpc_hb_min_interval,
+		&xpc_hb_max_interval
 	},
 	{
 		2,
@@ -111,7 +115,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
 		&proc_dointvec_minmax,
 		&sysctl_intvec,
 		NULL,
-		&xpc_hb_check_min, &xpc_hb_check_max
+		&xpc_hb_check_min_interval,
+		&xpc_hb_check_max_interval
 	},
 	{0}
 };
@@ -148,11 +153,11 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 
-/* xpc_hb_checker thread exited notification */
-static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
+/* used as an indication of when the xpc_hb_checker thread is inactive */
+static DECLARE_MUTEX_LOCKED(xpc_hb_checker_inactive);
 
-/* xpc_discovery thread exited notification */
-static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
+/* used as an indication of when the xpc_discovery thread is inactive */
+static DECLARE_MUTEX_LOCKED(xpc_discovery_inactive);
 
 
 static struct timer_list xpc_hb_timer;
@@ -161,6 +166,30 @@ static struct timer_list xpc_hb_timer;
 static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 
 
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+	.notifier_call = xpc_system_reboot,
+};
+
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage request.
+ */
+static void
+xpc_timeout_partition_disengage_request(unsigned long data)
+{
+	struct xpc_partition *part = (struct xpc_partition *) data;
+
+
+	DBUG_ON(XPC_TICKS < part->disengage_request_timeout);
+
+	(void) xpc_partition_disengaged(part);
+
+	DBUG_ON(part->disengage_request_timeout != 0);
+	DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+}
+
+
 /*
  * Notify the heartbeat check thread that an IRQ has been received.
  */
@@ -214,12 +243,6 @@ xpc_hb_checker(void *ignore)
 
 	while (!(volatile int) xpc_exiting) {
 
-		/* wait for IRQ or timeout */
-		(void) wait_event_interruptible(xpc_act_IRQ_wq,
-			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-					jiffies >= xpc_hb_check_timeout ||
-						(volatile int) xpc_exiting));
-
 		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 			"been received\n",
 			(int) (xpc_hb_check_timeout - jiffies),
@@ -240,6 +263,7 @@ xpc_hb_checker(void *ignore)
 		}
 
 
+		/* check for outstanding IRQs */
 		new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
 		if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
 			force_IRQ = 0;
@@ -257,13 +281,19 @@ xpc_hb_checker(void *ignore)
 			xpc_hb_check_timeout = jiffies +
 					   (xpc_hb_check_interval * HZ);
 		}
+
+		/* wait for IRQ or timeout */
+		(void) wait_event_interruptible(xpc_act_IRQ_wq,
+			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
+					jiffies >= xpc_hb_check_timeout ||
+						(volatile int) xpc_exiting));
 	}
 
 	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 
 
 	/* mark this thread as inactive */
-	up(&xpc_hb_checker_exited);
+	up(&xpc_hb_checker_inactive);
 	return 0;
 }
 
@@ -283,7 +313,7 @@ xpc_initiate_discovery(void *ignore)
 	dev_dbg(xpc_part, "discovery thread is exiting\n");
 
 	/* mark this thread as inactive */
-	up(&xpc_discovery_exited);
+	up(&xpc_discovery_inactive);
 	return 0;
 }
 
@@ -309,7 +339,7 @@ xpc_make_first_contact(struct xpc_partition *part)
 			"partition %d\n", XPC_PARTID(part));
 
 		/* wait a 1/4 of a second or so */
-		msleep_interruptible(250);
+		(void) msleep_interruptible(250);
 
 		if (part->act_state == XPC_P_DEACTIVATING) {
 			return part->reason;
@@ -336,7 +366,8 @@ static void
 xpc_channel_mgr(struct xpc_partition *part)
 {
 	while (part->act_state != XPC_P_DEACTIVATING ||
-				atomic_read(&part->nchannels_active) > 0) {
+			atomic_read(&part->nchannels_active) > 0 ||
+					!xpc_partition_disengaged(part)) {
 
 		xpc_process_channel_activity(part);
 
@@ -360,7 +391,8 @@ xpc_channel_mgr(struct xpc_partition *part)
 				(volatile u64) part->local_IPI_amo != 0 ||
 				((volatile u8) part->act_state ==
 							XPC_P_DEACTIVATING &&
-				atomic_read(&part->nchannels_active) == 0)));
+				atomic_read(&part->nchannels_active) == 0 &&
+				xpc_partition_disengaged(part))));
 		atomic_set(&part->channel_mgr_requests, 1);
 
 		// >>> Does it need to wakeup periodically as well? In case we
@@ -482,7 +514,7 @@ xpc_activating(void *__partid)
 		return 0;
 	}
 
-	XPC_ALLOW_HB(partid, xpc_vars);
+	xpc_allow_hb(partid, xpc_vars);
 	xpc_IPI_send_activated(part);
 
 
@@ -492,6 +524,7 @@ xpc_activating(void *__partid)
 	 */
 	(void) xpc_partition_up(part);
 
+	xpc_disallow_hb(partid, xpc_vars);
 	xpc_mark_partition_inactive(part);
 
 	if (part->reason == xpcReactivating) {
@@ -704,11 +737,14 @@ xpc_daemonize_kthread(void *args)
 		xpc_kthread_waitmsgs(part, ch);
 	}
 
-	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
-			((ch->flags & XPC_C_CONNECTCALLOUT) ||
-				(ch->reason != xpcUnregistering &&
-					ch->reason != xpcOtherUnregistering))) {
-		xpc_disconnected_callout(ch);
+	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
+		if (ch->flags & XPC_C_CONNECTCALLOUT) {
+			xpc_disconnecting_callout(ch);
+		}
+		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
+			xpc_mark_partition_disengaged(part);
+			xpc_IPI_send_disengage(part);
+		}
 	}
 
 
@@ -740,6 +776,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 	unsigned long irq_flags;
 	pid_t pid;
 	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
 
 
 	while (needed-- > 0) {
@@ -770,9 +807,13 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 		 * kthread. That kthread is responsible for doing the
 		 * counterpart to the following before it exits.
 		 */
-		(void) xpc_part_ref(&xpc_partitions[ch->partid]);
+		(void) xpc_part_ref(part);
 		xpc_msgqueue_ref(ch);
-		atomic_inc(&ch->kthreads_assigned);
+		if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
+			if (atomic_inc_return(&part->nchannels_engaged) == 1) {
+				xpc_mark_partition_engaged(part);
+			}
+		}
 		ch->kthreads_created++;	// >>> temporary debug only!!!
 	}
 }
@@ -781,6 +822,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 void
 xpc_disconnect_wait(int ch_number)
 {
+	unsigned long irq_flags;
 	partid_t partid;
 	struct xpc_partition *part;
 	struct xpc_channel *ch;
@@ -793,10 +835,13 @@ xpc_disconnect_wait(int ch_number)
 		if (xpc_part_ref(part)) {
 			ch = &part->channels[ch_number];
 
-// >>> how do we keep from falling into the window between our check and going
-// >>> down and coming back up where sema is re-inited?
-			if (ch->flags & XPC_C_SETUP) {
-				(void) down(&ch->teardown_sema);
+			if (ch->flags & XPC_C_WDISCONNECT) {
+				if (!(ch->flags & XPC_C_DISCONNECTED)) {
+					(void) down(&ch->wdisconnect_sema);
+				}
+				spin_lock_irqsave(&ch->lock, irq_flags);
+				ch->flags &= ~XPC_C_WDISCONNECT;
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
 			}
 
 			xpc_part_deref(part);
@@ -806,62 +851,89 @@ xpc_disconnect_wait(int ch_number)
 
 
 static void
-xpc_do_exit(void)
+xpc_do_exit(enum xpc_retval reason)
 {
 	partid_t partid;
 	int active_part_count;
 	struct xpc_partition *part;
+	unsigned long printmsg_time;
 
 
-	/* now it's time to eliminate our heartbeat */
-	del_timer_sync(&xpc_hb_timer);
-	xpc_vars->heartbeating_to_mask = 0;
-
-	/* indicate to others that our reserved page is uninitialized */
-	xpc_rsvd_page->vars_pa = 0;
+	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+	DBUG_ON(xpc_exiting == 1);
 
 	/*
-	 * Ignore all incoming interrupts. Without interupts the heartbeat
-	 * checker won't activate any new partitions that may come up.
-	 */
-	free_irq(SGI_XPC_ACTIVATE, NULL);
-
-	/*
-	 * Cause the heartbeat checker and the discovery threads to exit.
-	 * We don't want them attempting to activate new partitions as we
-	 * try to deactivate the existing ones.
+	 * Let the heartbeat checker thread and the discovery thread
+	 * (if one is running) know that they should exit. Also wake up
+	 * the heartbeat checker thread in case it's sleeping.
 	 */
 	xpc_exiting = 1;
 	wake_up_interruptible(&xpc_act_IRQ_wq);
 
-	/* wait for the heartbeat checker thread to mark itself inactive */
-	down(&xpc_hb_checker_exited);
+	/* ignore all incoming interrupts */
+	free_irq(SGI_XPC_ACTIVATE, NULL);
 
 	/* wait for the discovery thread to mark itself inactive */
-	down(&xpc_discovery_exited);
+	down(&xpc_discovery_inactive);
+
+	/* wait for the heartbeat checker thread to mark itself inactive */
+	down(&xpc_hb_checker_inactive);
 
 
-	msleep_interruptible(300);
+	/* sleep for a 1/3 of a second or so */
+	(void) msleep_interruptible(300);
 
 
 	/* wait for all partitions to become inactive */
 
+	printmsg_time = jiffies;
+
 	do {
 		active_part_count = 0;
 
 		for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 			part = &xpc_partitions[partid];
-			if (part->act_state != XPC_P_INACTIVE) {
-				active_part_count++;
-
-				XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
+			if (xpc_partition_disengaged(part) &&
+					part->act_state == XPC_P_INACTIVE) {
+				continue;
 			}
+
+			active_part_count++;
+
+			XPC_DEACTIVATE_PARTITION(part, reason);
+		}
+
+		if (active_part_count == 0) {
+			break;
+		}
+
+		if (jiffies >= printmsg_time) {
+			dev_info(xpc_part, "waiting for partitions to "
+				"deactivate/disengage, active count=%d, remote "
+				"engaged=0x%lx\n", active_part_count,
+				xpc_partition_engaged(1UL << partid));
+
+			printmsg_time = jiffies +
+					(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
 		}
 
-		if (active_part_count)
-			msleep_interruptible(300);
-	} while (active_part_count > 0);
+		/* sleep for a 1/3 of a second or so */
+		(void) msleep_interruptible(300);
+
+	} while (1);
+
+	DBUG_ON(xpc_partition_engaged(-1UL));
+
+
+	/* indicate to others that our reserved page is uninitialized */
+	xpc_rsvd_page->vars_pa = 0;
+
+	/* now it's time to eliminate our heartbeat */
+	del_timer_sync(&xpc_hb_timer);
+	DBUG_ON(xpc_vars->heartbeating_to_mask == 0);
 
+	/* take ourselves off of the reboot_notifier_list */
+	(void) unregister_reboot_notifier(&xpc_reboot_notifier);
 
 	/* close down protections for IPI operations */
 	xpc_restrict_IPI_ops();
@@ -876,6 +948,34 @@ xpc_do_exit(void)
 }
 
 
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xpc_retval reason;
+
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpcSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpcSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpcSystemPoweroff;
+		break;
+	default:
+		reason = xpcSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+
 int __init
 xpc_init(void)
 {
@@ -920,6 +1020,12 @@ xpc_init(void)
 		spin_lock_init(&part->act_lock);
 		part->act_state = XPC_P_INACTIVE;
 		XPC_SET_REASON(part, 0, 0);
+
+		init_timer(&part->disengage_request_timer);
+		part->disengage_request_timer.function =
+				xpc_timeout_partition_disengage_request;
+		part->disengage_request_timer.data = (unsigned long) part;
+
 		part->setup_state = XPC_P_UNSET;
 		init_waitqueue_head(&part->teardown_wq);
 		atomic_set(&part->references, 0);
@@ -976,6 +1082,13 @@ xpc_init(void)
 	}
 
 
+	/* add ourselves to the reboot_notifier_list */
+	ret = register_reboot_notifier(&xpc_reboot_notifier);
+	if (ret != 0) {
+		dev_warn(xpc_part, "can't register reboot notifier\n");
+	}
+
+
 	/*
 	 * Set the beating to other partitions into motion.  This is
 	 * the last requirement for other partitions' discovery to
@@ -997,6 +1110,9 @@ xpc_init(void)
 		/* indicate to others that our reserved page is uninitialized */
 		xpc_rsvd_page->vars_pa = 0;
 
+		/* take ourselves off of the reboot_notifier_list */
+		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
+
 		del_timer_sync(&xpc_hb_timer);
 		free_irq(SGI_XPC_ACTIVATE, NULL);
 		xpc_restrict_IPI_ops();
@@ -1018,9 +1134,9 @@ xpc_init(void)
 		dev_err(xpc_part, "failed while forking discovery thread\n");
 
 		/* mark this new thread as a non-starter */
-		up(&xpc_discovery_exited);
+		up(&xpc_discovery_inactive);
 
-		xpc_do_exit();
+		xpc_do_exit(xpcUnloading);
 		return -EBUSY;
 	}
 
@@ -1039,7 +1155,7 @@ module_init(xpc_init);
 void __exit
 xpc_exit(void)
 {
-	xpc_do_exit();
+	xpc_do_exit(xpcUnloading);
 }
 module_exit(xpc_exit);
 
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 578265ea9e6..79a0fc4c860 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -76,11 +76,6 @@ char ____cacheline_aligned
 		xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
 
 
-/* systune related variables */
-int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
-int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
-
-
 /*
  * Given a nasid, get the physical address of the  partition's reserved page
  * for that nasid. This function returns 0 on any error.
@@ -239,16 +234,21 @@ xpc_rsvd_page_init(void)
 	xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
 
 
-	/*
-	 * Initialize the activation related AMO variables.
-	 */
-	xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
-	for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
-		xpc_IPI_init(i + XP_MAX_PARTITIONS);
+	/* initialize the activate IRQ related AMO variables */
+	for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
+		(void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
 	}
+
+	/* initialize the engaged remote partitions related AMO variables */
+	(void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+	(void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+
 	/* export AMO page's physical address to other partitions */
 	xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
 
+	/* timestamp of when reserved page was initialized */
+	rp->stamp = CURRENT_TIME;
+
 	/*
 	 * This signifies to the remote partition that our reserved
 	 * page is initialized.
@@ -387,6 +387,11 @@ xpc_check_remote_hb(void)
 	remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
 
 	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+
+		if (xpc_exiting) {
+			break;
+		}
+
 		if (partid == sn_partition_id) {
 			continue;
 		}
@@ -417,7 +422,7 @@ xpc_check_remote_hb(void)
 
 		if (((remote_vars->heartbeat == part->last_heartbeat) &&
 			(remote_vars->kdb_status == 0)) ||
-			     !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+			     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
 
 			XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
 			continue;
@@ -436,23 +441,23 @@ xpc_check_remote_hb(void)
  */
 static enum xpc_retval
 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
-		struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
+		struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 {
 	int bres, i;
 
 
 	/* get the reserved page's physical address */
 
-	*remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
+	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
 						XPC_RSVD_PAGE_ALIGNED_SIZE);
-	if (*remote_rsvd_page_pa == 0) {
+	if (*remote_rp_pa == 0) {
 		return xpcNoRsvdPageAddr;
 	}
 
 
 	/* pull over the reserved page structure */
 
-	bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
+	bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
 				XPC_RSVD_PAGE_ALIGNED_SIZE,
 				(BTE_NOTIFY | BTE_WACQUIRE), NULL);
 	if (bres != BTE_SUCCESS) {
@@ -523,6 +528,55 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
 }
 
 
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
+		struct timespec *remote_rp_stamp, u64 remote_rp_pa,
+		u64 remote_vars_pa, struct xpc_vars *remote_vars)
+{
+	part->remote_rp_version = remote_rp_version;
+	dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
+		part->remote_rp_version);
+
+	part->remote_rp_stamp = *remote_rp_stamp;
+	dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+		part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+
+	part->remote_rp_pa = remote_rp_pa;
+	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+	part->remote_vars_pa = remote_vars_pa;
+	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+		part->remote_vars_pa);
+
+	part->last_heartbeat = remote_vars->heartbeat;
+	dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
+		part->last_heartbeat);
+
+	part->remote_vars_part_pa = remote_vars->vars_part_pa;
+	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+		part->remote_vars_part_pa);
+
+	part->remote_act_nasid = remote_vars->act_nasid;
+	dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
+		part->remote_act_nasid);
+
+	part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+	dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
+		part->remote_act_phys_cpuid);
+
+	part->remote_amos_page_pa = remote_vars->amos_page_pa;
+	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+		part->remote_amos_page_pa);
+
+	part->remote_vars_version = remote_vars->version;
+	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+		part->remote_vars_version);
+}
+
+
 /*
  * Prior code has determine the nasid which generated an IPI.  Inspect
  * that nasid to determine if its partition needs to be activated or
@@ -542,8 +596,12 @@ xpc_identify_act_IRQ_req(int nasid)
 {
 	struct xpc_rsvd_page *remote_rp;
 	struct xpc_vars *remote_vars;
-	u64 remote_rsvd_page_pa;
+	u64 remote_rp_pa;
 	u64 remote_vars_pa;
+	int remote_rp_version;
+	int reactivate = 0;
+	int stamp_diff;
+	struct timespec remote_rp_stamp = { 0, 0 };
 	partid_t partid;
 	struct xpc_partition *part;
 	enum xpc_retval ret;
@@ -553,7 +611,7 @@ xpc_identify_act_IRQ_req(int nasid)
 
 	remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
 
-	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
+	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
 	if (ret != xpcSuccess) {
 		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
 			"which sent interrupt, reason=%d\n", nasid, ret);
@@ -561,6 +619,10 @@ xpc_identify_act_IRQ_req(int nasid)
 	}
 
 	remote_vars_pa = remote_rp->vars_pa;
+	remote_rp_version = remote_rp->version;
+	if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+		remote_rp_stamp = remote_rp->stamp;
+	}
 	partid = remote_rp->partid;
 	part = &xpc_partitions[partid];
 
@@ -586,44 +648,117 @@ xpc_identify_act_IRQ_req(int nasid)
 		"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
 		remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
 
+	if (xpc_partition_disengaged(part) &&
+					part->act_state == XPC_P_INACTIVE) {
 
-	if (part->act_state == XPC_P_INACTIVE) {
+		xpc_update_partition_info(part, remote_rp_version,
+					&remote_rp_stamp, remote_rp_pa,
+					remote_vars_pa, remote_vars);
 
-		part->remote_rp_pa = remote_rsvd_page_pa;
-		dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n",
-			part->remote_rp_pa);
+		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+			if (xpc_partition_disengage_requested(1UL << partid)) {
+				/*
+				 * Other side is waiting on us to disengage,
+				 * even though we already have.
+				 */
+				return;
+			}
+		} else {
+			/* other side doesn't support disengage requests */
+			xpc_clear_partition_disengage_request(1UL << partid);
+		}
 
-		part->remote_vars_pa = remote_vars_pa;
-		dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-			part->remote_vars_pa);
+		xpc_activate_partition(part);
+		return;
+	}
 
-		part->last_heartbeat = remote_vars->heartbeat;
-		dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-			part->last_heartbeat);
+	DBUG_ON(part->remote_rp_version == 0);
+	DBUG_ON(part->remote_vars_version == 0);
+
+	if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+		DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+							remote_vars_version));
+
+		if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+			DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+								version));
+			/* see if the other side rebooted */
+			if (part->remote_amos_page_pa ==
+				remote_vars->amos_page_pa &&
+					xpc_hb_allowed(sn_partition_id,
+								remote_vars)) {
+				/* doesn't look that way, so ignore the IPI */
+				return;
+			}
+		}
 
-		part->remote_vars_part_pa = remote_vars->vars_part_pa;
-		dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-			part->remote_vars_part_pa);
+		/*
+		 * Other side rebooted and previous XPC didn't support the
+		 * disengage request, so we don't need to do anything special.
+		 */
 
-		part->remote_act_nasid = remote_vars->act_nasid;
-		dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-			part->remote_act_nasid);
+		xpc_update_partition_info(part, remote_rp_version,
+						&remote_rp_stamp, remote_rp_pa,
+						remote_vars_pa, remote_vars);
+		part->reactivate_nasid = nasid;
+		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+		return;
+	}
 
-		part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-		dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-			part->remote_act_phys_cpuid);
+	DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
 
-		part->remote_amos_page_pa = remote_vars->amos_page_pa;
-		dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-			part->remote_amos_page_pa);
+	if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
 
-		xpc_activate_partition(part);
+		/*
+		 * Other side rebooted and previous XPC did support the
+		 * disengage request, but the new one doesn't.
+		 */
 
-	} else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
-			!XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+		xpc_clear_partition_engaged(1UL << partid);
+		xpc_clear_partition_disengage_request(1UL << partid);
 
+		xpc_update_partition_info(part, remote_rp_version,
+						&remote_rp_stamp, remote_rp_pa,
+						remote_vars_pa, remote_vars);
+		reactivate = 1;
+
+	} else {
+		DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+		stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+							&remote_rp_stamp);
+		if (stamp_diff != 0) {
+			DBUG_ON(stamp_diff >= 0);
+
+			/*
+			 * Other side rebooted and the previous XPC did support
+			 * the disengage request, as does the new one.
+			 */
+
+			DBUG_ON(xpc_partition_engaged(1UL << partid));
+			DBUG_ON(xpc_partition_disengage_requested(1UL <<
+								partid));
+
+			xpc_update_partition_info(part, remote_rp_version,
+						&remote_rp_stamp, remote_rp_pa,
+						remote_vars_pa, remote_vars);
+			reactivate = 1;
+		}
+	}
+
+	if (!xpc_partition_disengaged(part)) {
+		/* still waiting on other side to disengage from us */
+		return;
+	}
+
+	if (reactivate) {
 		part->reactivate_nasid = nasid;
 		XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+
+	} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+			xpc_partition_disengage_requested(1UL << partid)) {
+		XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
 	}
 }
 
@@ -646,12 +781,16 @@ xpc_identify_act_IRQ_sender(void)
 	struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
 
 
-	act_amos = xpc_vars->act_amos;
+	act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
 
 
 	/* scan through act AMO variable looking for non-zero entries */
 	for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
 
+		if (xpc_exiting) {
+			break;
+		}
+
 		nasid_mask = xpc_IPI_receive(&act_amos[word]);
 		if (nasid_mask == 0) {
 			/* no IRQs from nasids in this variable */
@@ -687,6 +826,55 @@ xpc_identify_act_IRQ_sender(void)
 }
 
 
+/*
+ * See if the other side has responded to a partition disengage request
+ * from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+	partid_t partid = XPC_PARTID(part);
+	int disengaged;
+
+
+	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
+	if (part->disengage_request_timeout) {
+		if (!disengaged) {
+			if (jiffies < part->disengage_request_timeout) {
+				/* timelimit hasn't been reached yet */
+				return 0;
+			}
+
+			/*
+			 * Other side hasn't responded to our disengage
+			 * request in a timely fashion, so assume it's dead.
+			 */
+
+			xpc_clear_partition_engaged(1UL << partid);
+			disengaged = 1;
+		}
+		part->disengage_request_timeout = 0;
+
+		/* cancel the timer function, provided it's not us */
+		if (!in_interrupt()) {
+			del_singleshot_timer_sync(&part->
+						      disengage_request_timer);
+		}
+
+		DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
+					part->act_state != XPC_P_INACTIVE);
+		if (part->act_state != XPC_P_INACTIVE) {
+			xpc_wakeup_channel_mgr(part);
+		}
+
+		if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+			xpc_cancel_partition_disengage_request(part);
+		}
+	}
+	return disengaged;
+}
+
+
 /*
  * Mark specified partition as active.
  */
@@ -721,7 +909,6 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 				enum xpc_retval reason)
 {
 	unsigned long irq_flags;
-	partid_t partid = XPC_PARTID(part);
 
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
@@ -749,17 +936,27 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
-	XPC_DISALLOW_HB(partid, xpc_vars);
+	if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+		xpc_request_partition_disengage(part);
+		xpc_IPI_send_disengage(part);
+
+		/* set a timelimit on the disengage request */
+		part->disengage_request_timeout = jiffies +
+					(XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ);
+		part->disengage_request_timer.expires =
+					part->disengage_request_timeout;
+		add_timer(&part->disengage_request_timer);
+	}
 
 	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
 		reason);
 
-	xpc_partition_down(part, reason);
+	xpc_partition_going_down(part, reason);
 }
 
 
 /*
- * Mark specified partition as active.
+ * Mark specified partition as inactive.
  */
 void
 xpc_mark_partition_inactive(struct xpc_partition *part)
@@ -792,7 +989,7 @@ xpc_discovery(void)
 	void *remote_rp_base;
 	struct xpc_rsvd_page *remote_rp;
 	struct xpc_vars *remote_vars;
-	u64 remote_rsvd_page_pa;
+	u64 remote_rp_pa;
 	u64 remote_vars_pa;
 	int region;
 	int max_regions;
@@ -877,7 +1074,7 @@ xpc_discovery(void)
 			/* pull over the reserved page structure */
 
 			ret = xpc_get_remote_rp(nasid, discovered_nasids,
-					      remote_rp, &remote_rsvd_page_pa);
+					      remote_rp, &remote_rp_pa);
 			if (ret != xpcSuccess) {
 				dev_dbg(xpc_part, "unable to get reserved page "
 					"from nasid %d, reason=%d\n", nasid,
@@ -948,6 +1145,13 @@ xpc_discovery(void)
 				remote_vars->act_nasid,
 				remote_vars->act_phys_cpuid);
 
+			if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+								version)) {
+				part->remote_amos_page_pa =
+						remote_vars->amos_page_pa;
+				xpc_mark_partition_disengaged(part);
+				xpc_cancel_partition_disengage_request(part);
+			}
 			xpc_IPI_send_activate(remote_vars);
 		}
 	}
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
index 1df1c9f61a6..f3052a54932 100644
--- a/include/asm-ia64/sn/xp.h
+++ b/include/asm-ia64/sn/xp.h
@@ -217,7 +217,15 @@ enum xpc_retval {
 	xpcInvalidPartid,	/* 42: invalid partition ID */
 	xpcLocalPartid,		/* 43: local partition ID */
 
-	xpcUnknownReason	/* 44: unknown reason -- must be last in list */
+	xpcOtherGoingDown,	/* 44: other side going down, reason unknown */
+	xpcSystemGoingDown,	/* 45: system is going down, reason unknown */
+	xpcSystemHalt,		/* 46: system is being halted */
+	xpcSystemReboot,	/* 47: system is being rebooted */
+	xpcSystemPoweroff,	/* 48: system is being powered off */
+
+	xpcDisconnecting,	/* 49: channel disconnecting (closing) */
+
+	xpcUnknownReason	/* 50: unknown reason -- must be last in list */
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From 5fbcf9a5c6904bd563f584d12d1f4d3f68a19d7d Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Tue, 6 Sep 2005 13:03:51 -0500
Subject: [IA64-SGI] volatile semantics in places where it seems necessary

Resend using accessors instead of volatile qualifiers per hch comments, and
easier to understand convenience macros per rja comments.

Patch to apply volatile semantics when accessing MMR's in various SN files.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/pci/pcibr/pcibr_reg.c   | 58 +++++++++++++++++++-----------------
 arch/ia64/sn/pci/tioca_provider.c    | 31 ++++++++++---------
 arch/ia64/sn/pci/tioce_provider.c    | 29 +++++++++---------
 include/asm-ia64/sn/io.h             |  9 ++++++
 include/asm-ia64/sn/tioca_provider.h | 14 ++++-----
 5 files changed, 76 insertions(+), 65 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
index 21426d02fbe..1624b39cb3e 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -29,10 +29,10 @@ void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_control &= ~bits;
+			__sn_clrq_relaxed(&ptr->tio.cp_control, bits);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_wid_control &= ~bits;
+			__sn_clrq_relaxed(&ptr->pic.p_wid_control, bits);
 			break;
 		default:
 			panic
@@ -49,10 +49,10 @@ void pcireg_control_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_control |= bits;
+			__sn_setq_relaxed(&ptr->tio.cp_control, bits);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_wid_control |= bits;
+			__sn_setq_relaxed(&ptr->pic.p_wid_control, bits);
 			break;
 		default:
 			panic
@@ -73,10 +73,10 @@ uint64_t pcireg_tflush_get(struct pcibus_info *pcibus_info)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ret = ptr->tio.cp_tflush;
+			ret = __sn_readq_relaxed(&ptr->tio.cp_tflush);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ret = ptr->pic.p_wid_tflush;
+			ret = __sn_readq_relaxed(&ptr->pic.p_wid_tflush);
 			break;
 		default:
 			panic
@@ -103,10 +103,10 @@ uint64_t pcireg_intr_status_get(struct pcibus_info * pcibus_info)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ret = ptr->tio.cp_int_status;
+			ret = __sn_readq_relaxed(&ptr->tio.cp_int_status);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ret = ptr->pic.p_int_status;
+			ret = __sn_readq_relaxed(&ptr->pic.p_int_status);
 			break;
 		default:
 			panic
@@ -127,10 +127,10 @@ void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_int_enable &= ~bits;
+			__sn_clrq_relaxed(&ptr->tio.cp_int_enable, bits);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_int_enable &= ~bits;
+			__sn_clrq_relaxed(&ptr->pic.p_int_enable, ~bits);
 			break;
 		default:
 			panic
@@ -147,10 +147,10 @@ void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_int_enable |= bits;
+			__sn_setq_relaxed(&ptr->tio.cp_int_enable, bits);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_int_enable |= bits;
+			__sn_setq_relaxed(&ptr->pic.p_int_enable, bits);
 			break;
 		default:
 			panic
@@ -171,14 +171,16 @@ void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n,
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_int_addr[int_n] &= ~TIOCP_HOST_INTR_ADDR;
-			ptr->tio.cp_int_addr[int_n] |=
-			    (addr & TIOCP_HOST_INTR_ADDR);
+			__sn_clrq_relaxed(&ptr->tio.cp_int_addr[int_n],
+			    TIOCP_HOST_INTR_ADDR);
+			__sn_setq_relaxed(&ptr->tio.cp_int_addr[int_n],
+			    (addr & TIOCP_HOST_INTR_ADDR));
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_int_addr[int_n] &= ~PIC_HOST_INTR_ADDR;
-			ptr->pic.p_int_addr[int_n] |=
-			    (addr & PIC_HOST_INTR_ADDR);
+			__sn_clrq_relaxed(&ptr->pic.p_int_addr[int_n],
+			    PIC_HOST_INTR_ADDR);
+			__sn_setq_relaxed(&ptr->pic.p_int_addr[int_n],
+			    (addr & PIC_HOST_INTR_ADDR));
 			break;
 		default:
 			panic
@@ -198,10 +200,10 @@ void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_force_pin[int_n] = 1;
+			writeq(1, &ptr->tio.cp_force_pin[int_n]);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_force_pin[int_n] = 1;
+			writeq(1, &ptr->pic.p_force_pin[int_n]);
 			break;
 		default:
 			panic
@@ -222,10 +224,12 @@ uint64_t pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ret = ptr->tio.cp_wr_req_buf[device];
+			ret =
+			    __sn_readq_relaxed(&ptr->tio.cp_wr_req_buf[device]);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ret = ptr->pic.p_wr_req_buf[device];
+			ret =
+			    __sn_readq_relaxed(&ptr->pic.p_wr_req_buf[device]);
 			break;
 		default:
 		      panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", (void *)ptr);
@@ -244,10 +248,10 @@ void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index,
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ptr->tio.cp_int_ate_ram[ate_index] = (uint64_t) val;
+			writeq(val, &ptr->tio.cp_int_ate_ram[ate_index]);
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ptr->pic.p_int_ate_ram[ate_index] = (uint64_t) val;
+			writeq(val, &ptr->pic.p_int_ate_ram[ate_index]);
 			break;
 		default:
 			panic
@@ -265,12 +269,10 @@ uint64_t *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index)
 	if (pcibus_info) {
 		switch (pcibus_info->pbi_bridge_type) {
 		case PCIBR_BRIDGETYPE_TIOCP:
-			ret =
-			    (uint64_t *) & (ptr->tio.cp_int_ate_ram[ate_index]);
+			ret = &ptr->tio.cp_int_ate_ram[ate_index];
 			break;
 		case PCIBR_BRIDGETYPE_PIC:
-			ret =
-			    (uint64_t *) & (ptr->pic.p_int_ate_ram[ate_index]);
+			ret = &ptr->pic.p_int_ate_ram[ate_index];
 			break;
 		default:
 			panic
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index ea09c12f025..eaae2472d6b 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -37,7 +37,7 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 	uint64_t offset;
 	struct page *tmp;
 	struct tioca_common *tioca_common;
-	volatile struct tioca *ca_base;
+	struct tioca *ca_base;
 
 	tioca_common = tioca_kern->ca_common;
 	ca_base = (struct tioca *)tioca_common->ca_common.bs_base;
@@ -174,27 +174,29 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 	 * 	DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029
 	 */
 
-	ca_base->ca_control1 |= CA_AGPDMA_OP_ENB_COMBDELAY;	/* PV895469 ? */
-	ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
-	ca_base->ca_control2 |= (0x2ull << CA_GART_MEM_PARAM_SHFT);
+	__sn_setq_relaxed(&ca_base->ca_control1,
+			CA_AGPDMA_OP_ENB_COMBDELAY);	/* PV895469 ? */
+	__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
+	__sn_setq_relaxed(&ca_base->ca_control2,
+			(0x2ull << CA_GART_MEM_PARAM_SHFT));
 	tioca_kern->ca_gart_iscoherent = 1;
-	ca_base->ca_control2 &=
-	    ~(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB);
+	__sn_clrq_relaxed(&ca_base->ca_control2,
+	    		(CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB));
 
 	/*
 	 * Unmask GART fetch error interrupts.  Clear residual errors first.
 	 */
 
-	ca_base->ca_int_status_alias = CA_GART_FETCH_ERR;
-	ca_base->ca_mult_error_alias = CA_GART_FETCH_ERR;
-	ca_base->ca_int_mask &= ~CA_GART_FETCH_ERR;
+	writeq(CA_GART_FETCH_ERR, &ca_base->ca_int_status_alias);
+	writeq(CA_GART_FETCH_ERR, &ca_base->ca_mult_error_alias);
+	__sn_clrq_relaxed(&ca_base->ca_int_mask, CA_GART_FETCH_ERR);
 
 	/*
 	 * Program the aperature and gart registers in TIOCA
 	 */
 
-	ca_base->ca_gart_aperature = ap_reg;
-	ca_base->ca_gart_ptr_table = tioca_kern->ca_gart_coretalk_addr | 1;
+	writeq(ap_reg, &ca_base->ca_gart_aperature);
+	writeq(tioca_kern->ca_gart_coretalk_addr|1, &ca_base->ca_gart_ptr_table);
 
 	return 0;
 }
@@ -211,7 +213,6 @@ void
 tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
 {
 	int cap_ptr;
-	uint64_t ca_control1;
 	uint32_t reg;
 	struct tioca *tioca_base;
 	struct pci_dev *pdev;
@@ -256,9 +257,7 @@ tioca_fastwrite_enable(struct tioca_kernel *tioca_kern)
 	 */
 
 	tioca_base = (struct tioca *)common->ca_common.bs_base;
-	ca_control1 = tioca_base->ca_control1;
-	ca_control1 |= CA_AGP_FW_ENABLE;
-	tioca_base->ca_control1 = ca_control1;
+	__sn_setq_relaxed(&tioca_base->ca_control1, CA_AGP_FW_ENABLE);
 }
 
 EXPORT_SYMBOL(tioca_fastwrite_enable);	/* used by agp-sgi */
@@ -345,7 +344,7 @@ tioca_dma_d48(struct pci_dev *pdev, uint64_t paddr)
 		return 0;
 	}
 
-	agp_dma_extn = ca_base->ca_agp_dma_addr_extn;
+	agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn);
 	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
 		printk(KERN_ERR "%s:  coretalk upper node (%u) "
 		       "mismatch with ca_agp_dma_addr_extn (%lu)\n",
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 8e75db2b825..204826c2fa4 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -227,7 +227,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
 
 		ate = ATE_MAKE(addr, pagesize);
 		ate_shadow[i + j] = ate;
-		ate_reg[i + j] = ate;
+		writeq(ate, &ate_reg[i + j]);
 		addr += pagesize;
 	}
 
@@ -268,10 +268,10 @@ tioce_dma_d32(struct pci_dev *pdev, uint64_t ct_addr)
 	pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port);
 
 	if (ce_kern->ce_port[port].dirmap_refcnt == 0) {
-		volatile uint64_t tmp;
+		uint64_t tmp;
 
 		ce_kern->ce_port[port].dirmap_shadow = ct_upper;
-		ce_mmr->ce_ure_dir_map[port] = ct_upper;
+		writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
 		tmp = ce_mmr->ce_ure_dir_map[port];
 		dma_ok = 1;
 	} else
@@ -343,7 +343,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 	if (TIOCE_D32_ADDR(bus_addr)) {
 		if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
 			ce_kern->ce_port[port].dirmap_shadow = 0;
-			ce_mmr->ce_ure_dir_map[port] = 0;
+			writeq(0, &ce_mmr->ce_ure_dir_map[port]);
 		}
 	} else {
 		struct tioce_dmamap *map;
@@ -582,18 +582,18 @@ tioce_kern_init(struct tioce_common *tioce_common)
 	 */
 
 	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
-	tioce_mmr->ce_ure_page_map &= ~CE_URE_PAGESIZE_MASK;
-	tioce_mmr->ce_ure_page_map |= CE_URE_256K_PAGESIZE;
+	__sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
+	__sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
 	tioce_kern->ce_ate3240_pagesize = KB(256);
 
 	for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
 		tioce_kern->ce_ate40_shadow[i] = 0;
-		tioce_mmr->ce_ure_ate40[i] = 0;
+		writeq(0, &tioce_mmr->ce_ure_ate40[i]);
 	}
 
 	for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
 		tioce_kern->ce_ate3240_shadow[i] = 0;
-		tioce_mmr->ce_ure_ate3240[i] = 0;
+		writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
 	}
 
 	return tioce_kern;
@@ -665,7 +665,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
 	default:
 		return;
 	}
-	ce_mmr->ce_adm_force_int = force_int_val;
+	writeq(force_int_val, &ce_mmr->ce_adm_force_int);
 }
 
 /**
@@ -686,6 +686,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 	struct tioce_common *ce_common;
 	struct tioce *ce_mmr;
 	int bit;
+	uint64_t vector;
 
 	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
 	if (!pcidev_info)
@@ -696,11 +697,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
 
 	bit = sn_irq_info->irq_int_bit;
 
-	ce_mmr->ce_adm_int_mask |= (1UL << bit);
-	ce_mmr->ce_adm_int_dest[bit] =
-		((uint64_t)sn_irq_info->irq_irq << INTR_VECTOR_SHFT) |
-			   sn_irq_info->irq_xtalkaddr;
-	ce_mmr->ce_adm_int_mask &= ~(1UL << bit);
+	__sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+	vector = (uint64_t)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
+	vector |= sn_irq_info->irq_xtalkaddr;
+	writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
+	__sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
 
 	tioce_force_interrupt(sn_irq_info);
 }
diff --git a/include/asm-ia64/sn/io.h b/include/asm-ia64/sn/io.h
index 42209733f6b..7597a52b426 100644
--- a/include/asm-ia64/sn/io.h
+++ b/include/asm-ia64/sn/io.h
@@ -35,6 +35,15 @@ extern void sn_dma_flush(unsigned long);
 #define __sn_readl_relaxed ___sn_readl_relaxed
 #define __sn_readq_relaxed ___sn_readq_relaxed
 
+/*
+ * Convenience macros for setting/clearing bits using the above accessors
+ */
+
+#define __sn_setq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) | (val)), (addr))
+#define __sn_clrq_relaxed(addr, val) \
+	writeq((__sn_readq_relaxed(addr) & ~(val)), (addr))
+
 /*
  * The following routines are SN Platform specific, called when
  * a reference is made to inX/outX set macros.  SN Platform
diff --git a/include/asm-ia64/sn/tioca_provider.h b/include/asm-ia64/sn/tioca_provider.h
index 5ccec608d32..b532ef6148e 100644
--- a/include/asm-ia64/sn/tioca_provider.h
+++ b/include/asm-ia64/sn/tioca_provider.h
@@ -182,11 +182,11 @@ tioca_tlbflush(struct tioca_kernel *tioca_kernel)
 			 * touch every CL aligned GART entry.
 			 */
 
-			ca_base->ca_control2 &= ~(CA_GART_MEM_PARAM);
-			ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
-			ca_base->ca_control2 |=
-			    (0x2ull << CA_GART_MEM_PARAM_SHFT);
-			tmp = ca_base->ca_control2;
+			__sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM);
+			__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+			__sn_setq_relaxed(&ca_base->ca_control2,
+			    (0x2ull << CA_GART_MEM_PARAM_SHFT));
+			tmp = __sn_readq_relaxed(&ca_base->ca_control2);
 		}
 
 		return;
@@ -196,8 +196,8 @@ tioca_tlbflush(struct tioca_kernel *tioca_kernel)
 	 * Gart in uncached mode ... need an explicit flush.
 	 */
 
-	ca_base->ca_control2 |= CA_GART_FLUSH_TLB;
-	tmp = ca_base->ca_control2;
+	__sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB);
+	tmp = __sn_readq_relaxed(&ca_base->ca_control2);
 }
 
 extern uint32_t	tioca_gart_found;
-- 
cgit v1.2.3-70-g09d2


From d8c97d5f3aa348272df2ccb4e224b1cf9a1eb6d7 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 8 Sep 2005 12:39:59 -0700
Subject: [IA64] simplified efi memory map parsing

New version leaves the original memory map unmodified.
Also saves any granule trimmings for use by the uncached
memory allocator.

Inspired by Khalid Aziz (various traces of his patch still
remain).  Fixes to uncached_build_memmap() and sn2 testing
by Martin Hicks.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/efi.c      | 423 +++++++++++++++++++++++++-------------------
 arch/ia64/kernel/setup.c    |   3 +
 arch/ia64/kernel/uncached.c |  17 +-
 include/asm-ia64/meminit.h  |   4 +-
 4 files changed, 254 insertions(+), 193 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 179f230816e..1291db58172 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -239,57 +239,30 @@ is_available_memory (efi_memory_desc_t *md)
 	return 0;
 }
 
-/*
- * Trim descriptor MD so its starts at address START_ADDR.  If the descriptor covers
- * memory that is normally available to the kernel, issue a warning that some memory
- * is being ignored.
- */
-static void
-trim_bottom (efi_memory_desc_t *md, u64 start_addr)
-{
-	u64 num_skipped_pages;
-
-	if (md->phys_addr >= start_addr || !md->num_pages)
-		return;
-
-	num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
-	if (num_skipped_pages > md->num_pages)
-		num_skipped_pages = md->num_pages;
+typedef struct kern_memdesc {
+	u64 attribute;
+	u64 start;
+	u64 num_pages;
+} kern_memdesc_t;
 
-	if (is_available_memory(md))
-		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
-		       "at 0x%lx\n", __FUNCTION__,
-		       (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
-		       md->phys_addr, start_addr - IA64_GRANULE_SIZE);
-	/*
-	 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
-	 * descriptor list to become unsorted.  In such a case, md->num_pages will be
-	 * zero, so the Right Thing will happen.
-	 */
-	md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
-	md->num_pages -= num_skipped_pages;
-}
+static kern_memdesc_t *kern_memmap;
 
 static void
-trim_top (efi_memory_desc_t *md, u64 end_addr)
+walk (efi_freemem_callback_t callback, void *arg, u64 attr)
 {
-	u64 num_dropped_pages, md_end_addr;
+	kern_memdesc_t *k;
+	u64 start, end, voff;
 
-	md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-
-	if (md_end_addr <= end_addr || !md->num_pages)
-		return;
-
-	num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
-	if (num_dropped_pages > md->num_pages)
-		num_dropped_pages = md->num_pages;
-
-	if (is_available_memory(md))
-		printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
-		       "at 0x%lx\n", __FUNCTION__,
-		       (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
-		       md->phys_addr, end_addr);
-	md->num_pages -= num_dropped_pages;
+	voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
+	for (k = kern_memmap; k->start != ~0UL; k++) {
+		if (k->attribute != attr)
+			continue;
+		start = PAGE_ALIGN(k->start);
+		end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
+		if (start < end)
+			if ((*callback)(start + voff, end + voff, arg) < 0)
+				return;
+	}
 }
 
 /*
@@ -299,148 +272,19 @@ trim_top (efi_memory_desc_t *md, u64 end_addr)
 void
 efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 {
-	int prev_valid = 0;
-	struct range {
-		u64 start;
-		u64 end;
-	} prev, curr;
-	void *efi_map_start, *efi_map_end, *p, *q;
-	efi_memory_desc_t *md, *check_md;
-	u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
-	unsigned long total_mem = 0;
-
-	efi_map_start = __va(ia64_boot_param->efi_memmap);
-	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
-	efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-		md = p;
-
-		/* skip over non-WB memory descriptors; that's all we're interested in... */
-		if (!(md->attribute & EFI_MEMORY_WB))
-			continue;
-
-		/*
-		 * granule_addr is the base of md's first granule.
-		 * [granule_addr - first_non_wb_addr) is guaranteed to
-		 * be contiguous WB memory.
-		 */
-		granule_addr = GRANULEROUNDDOWN(md->phys_addr);
-		first_non_wb_addr = max(first_non_wb_addr, granule_addr);
-
-		if (first_non_wb_addr < md->phys_addr) {
-			trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
-			granule_addr = GRANULEROUNDDOWN(md->phys_addr);
-			first_non_wb_addr = max(first_non_wb_addr, granule_addr);
-		}
-
-		for (q = p; q < efi_map_end; q += efi_desc_size) {
-			check_md = q;
-
-			if ((check_md->attribute & EFI_MEMORY_WB) &&
-			    (check_md->phys_addr == first_non_wb_addr))
-				first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
-			else
-				break;		/* non-WB or hole */
-		}
-
-		last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
-		if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
-			trim_top(md, last_granule_addr);
-
-		if (is_available_memory(md)) {
-			if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
-				if (md->phys_addr >= max_addr)
-					continue;
-				md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
-				first_non_wb_addr = max_addr;
-			}
-
-			if (total_mem >= mem_limit)
-				continue;
-
-			if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
-				unsigned long limit_addr = md->phys_addr;
-
-				limit_addr += mem_limit - total_mem;
-				limit_addr = GRANULEROUNDDOWN(limit_addr);
-
-				if (md->phys_addr > limit_addr)
-					continue;
-
-				md->num_pages = (limit_addr - md->phys_addr) >>
-				                EFI_PAGE_SHIFT;
-				first_non_wb_addr = max_addr = md->phys_addr +
-				              (md->num_pages << EFI_PAGE_SHIFT);
-			}
-			total_mem += (md->num_pages << EFI_PAGE_SHIFT);
-
-			if (md->num_pages == 0)
-				continue;
-
-			curr.start = PAGE_OFFSET + md->phys_addr;
-			curr.end   = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
-
-			if (!prev_valid) {
-				prev = curr;
-				prev_valid = 1;
-			} else {
-				if (curr.start < prev.start)
-					printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
-
-				if (prev.end == curr.start) {
-					/* merge two consecutive memory ranges */
-					prev.end = curr.end;
-				} else {
-					start = PAGE_ALIGN(prev.start);
-					end = prev.end & PAGE_MASK;
-					if ((end > start) && (*callback)(start, end, arg) < 0)
-						return;
-					prev = curr;
-				}
-			}
-		}
-	}
-	if (prev_valid) {
-		start = PAGE_ALIGN(prev.start);
-		end = prev.end & PAGE_MASK;
-		if (end > start)
-			(*callback)(start, end, arg);
-	}
+	walk(callback, arg, EFI_MEMORY_WB);
 }
 
 /*
- * Walk the EFI memory map to pull out leftover pages in the lower
- * memory regions which do not end up in the regular memory map and
- * stick them into the uncached allocator
- *
- * The regular walk function is significantly more complex than the
- * uncached walk which means it really doesn't make sense to try and
- * marge the two.
+ * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
+ * has memory that is available for uncached allocator.
  */
-void __init
-efi_memmap_walk_uc (efi_freemem_callback_t callback)
+void
+efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
 {
-	void *efi_map_start, *efi_map_end, *p;
-	efi_memory_desc_t *md;
-	u64 efi_desc_size, start, end;
-
-	efi_map_start = __va(ia64_boot_param->efi_memmap);
-	efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
-	efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-		md = p;
-		if (md->attribute == EFI_MEMORY_UC) {
-			start = PAGE_ALIGN(md->phys_addr);
-			end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
-			if ((*callback)(start, end, NULL) < 0)
-				return;
-		}
-	}
+	walk(callback, arg, EFI_MEMORY_UC);
 }
 
-
 /*
  * Look for the PAL_CODE region reported by EFI and maps it using an
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
@@ -862,3 +706,220 @@ efi_uart_console_only(void)
 	printk(KERN_ERR "Malformed %s value\n", name);
 	return 0;
 }
+
+#define efi_md_size(md)	(md->num_pages << EFI_PAGE_SHIFT)
+
+static inline u64
+kmd_end(kern_memdesc_t *kmd)
+{
+	return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
+}
+
+static inline u64
+efi_md_end(efi_memory_desc_t *md)
+{
+	return (md->phys_addr + efi_md_size(md));
+}
+
+static inline int
+efi_wb(efi_memory_desc_t *md)
+{
+	return (md->attribute & EFI_MEMORY_WB);
+}
+
+static inline int
+efi_uc(efi_memory_desc_t *md)
+{
+	return (md->attribute & EFI_MEMORY_UC);
+}
+
+/*
+ * Look for the first granule aligned memory descriptor memory
+ * that is big enough to hold EFI memory map. Make sure this
+ * descriptor is atleast granule sized so it does not get trimmed
+ */
+struct kern_memdesc *
+find_memmap_space (void)
+{
+	u64	contig_low=0, contig_high=0;
+	u64	as = 0, ae;
+	void *efi_map_start, *efi_map_end, *p, *q;
+	efi_memory_desc_t *md, *pmd = NULL, *check_md;
+	u64	space_needed, efi_desc_size;
+	unsigned long total_mem = 0;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	/*
+	 * Worst case: we need 3 kernel descriptors for each efi descriptor
+	 * (if every entry has a WB part in the middle, and UC head and tail),
+	 * plus one for the end marker.
+	 */
+	space_needed = sizeof(kern_memdesc_t) *
+		(3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
+
+	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md)) {
+			continue;
+		}
+		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+			contig_low = GRANULEROUNDUP(md->phys_addr);
+			contig_high = efi_md_end(md);
+			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+				check_md = q;
+				if (!efi_wb(check_md))
+					break;
+				if (contig_high != check_md->phys_addr)
+					break;
+				contig_high = efi_md_end(check_md);
+			}
+			contig_high = GRANULEROUNDDOWN(contig_high);
+		}
+		if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
+			continue;
+
+		/* Round ends inward to granule boundaries */
+		as = max(contig_low, md->phys_addr);
+		ae = min(contig_high, efi_md_end(md));
+
+		/* keep within max_addr= command line arg */
+		ae = min(ae, max_addr);
+		if (ae <= as)
+			continue;
+
+		/* avoid going over mem= command line arg */
+		if (total_mem + (ae - as) > mem_limit)
+			ae -= total_mem + (ae - as) - mem_limit;
+
+		if (ae <= as)
+			continue;
+
+		if (ae - as > space_needed)
+			break;
+	}
+	if (p >= efi_map_end)
+		panic("Can't allocate space for kernel memory descriptors");
+
+	return __va(as);
+}
+
+/*
+ * Walk the EFI memory map and gather all memory available for kernel
+ * to use.  We can allocate partial granules only if the unavailable
+ * parts exist, and are WB.
+ */
+void
+efi_memmap_init(unsigned long *s, unsigned long *e)
+{
+	struct kern_memdesc *k, *prev = 0;
+	u64	contig_low=0, contig_high=0;
+	u64	as, ae, lim;
+	void *efi_map_start, *efi_map_end, *p, *q;
+	efi_memory_desc_t *md, *pmd = NULL, *check_md;
+	u64	efi_desc_size;
+	unsigned long total_mem = 0;
+
+	k = kern_memmap = find_memmap_space();
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
+		md = p;
+		if (!efi_wb(md)) {
+			if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
+				    	   md->type == EFI_BOOT_SERVICES_DATA)) {
+				k->attribute = EFI_MEMORY_UC;
+				k->start = md->phys_addr;
+				k->num_pages = md->num_pages;
+				k++;
+			}
+			continue;
+		}
+		if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+			contig_low = GRANULEROUNDUP(md->phys_addr);
+			contig_high = efi_md_end(md);
+			for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+				check_md = q;
+				if (!efi_wb(check_md))
+					break;
+				if (contig_high != check_md->phys_addr)
+					break;
+				contig_high = efi_md_end(check_md);
+			}
+			contig_high = GRANULEROUNDDOWN(contig_high);
+		}
+		if (!is_available_memory(md))
+			continue;
+
+		/*
+		 * Round ends inward to granule boundaries
+		 * Give trimmings to uncached allocator
+		 */
+		if (md->phys_addr < contig_low) {
+			lim = min(efi_md_end(md), contig_low);
+			if (efi_uc(md)) {
+				if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
+				    kmd_end(k-1) == md->phys_addr) {
+					(k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+				} else {
+					k->attribute = EFI_MEMORY_UC;
+					k->start = md->phys_addr;
+					k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+					k++;
+				}
+			}
+			as = contig_low;
+		} else
+			as = md->phys_addr;
+
+		if (efi_md_end(md) > contig_high) {
+			lim = max(md->phys_addr, contig_high);
+			if (efi_uc(md)) {
+				if (lim == md->phys_addr && k > kern_memmap &&
+				    (k-1)->attribute == EFI_MEMORY_UC &&
+				    kmd_end(k-1) == md->phys_addr) {
+					(k-1)->num_pages += md->num_pages;
+				} else {
+					k->attribute = EFI_MEMORY_UC;
+					k->start = lim;
+					k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
+					k++;
+				}
+			}
+			ae = contig_high;
+		} else
+			ae = efi_md_end(md);
+
+		/* keep within max_addr= command line arg */
+		ae = min(ae, max_addr);
+		if (ae <= as)
+			continue;
+
+		/* avoid going over mem= command line arg */
+		if (total_mem + (ae - as) > mem_limit)
+			ae -= total_mem + (ae - as) - mem_limit;
+
+		if (ae <= as)
+			continue;
+		if (prev && kmd_end(prev) == md->phys_addr) {
+			prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
+			total_mem += ae - as;
+			continue;
+		}
+		k->attribute = EFI_MEMORY_WB;
+		k->start = as;
+		k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
+		total_mem += ae - as;
+		prev = k++;
+	}
+	k->start = ~0L; /* end-marker */
+
+	/* reserve the memory we are using for kern_memmap */
+	*s = (u64)kern_memmap;
+	*e = (u64)++k;
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 84f89da7c64..1658d687b79 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -211,6 +211,9 @@ reserve_memory (void)
 	}
 #endif
 
+	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+	n++;
+
 	/* end of memory marker */
 	rsvd_region[n].start = ~0UL;
 	rsvd_region[n].end   = ~0UL;
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4e9d06c48a8..c6d40446c2c 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -205,23 +205,18 @@ EXPORT_SYMBOL(uncached_free_page);
 static int __init
 uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
 {
-	long length;
-	unsigned long vstart, vend;
+	long length = end - start;
 	int node;
 
-	length = end - start;
-	vstart = start + __IA64_UNCACHED_OFFSET;
-	vend = end + __IA64_UNCACHED_OFFSET;
-
 	dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
 
-	memset((char *)vstart, 0, length);
+	memset((char *)start, 0, length);
 
-	node = paddr_to_nid(start);
+	node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET);
 
-	for (; vstart < vend ; vstart += PAGE_SIZE) {
-		dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
-		gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
+	for (; start < end ; start += PAGE_SIZE) {
+		dprintk(KERN_INFO "sticking %lx into the pool!\n", start);
+		gen_pool_free(uncached_pool[node], start, PAGE_SIZE);
 	}
 
 	return 0;
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 1590dc65b30..90646632237 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -16,10 +16,11 @@
  * 	- initrd (optional)
  * 	- command line string
  * 	- kernel code & data
+ * 	- Kernel memory map built from EFI memory map
  *
  * More could be added if necessary
  */
-#define IA64_MAX_RSVD_REGIONS 5
+#define IA64_MAX_RSVD_REGIONS 6
 
 struct rsvd_region {
 	unsigned long start;	/* virtual address of beginning of element */
@@ -33,6 +34,7 @@ extern void find_memory (void);
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
+extern void efi_memmap_init(unsigned long *, unsigned long *);
 
 /*
  * For rounding an address to the next IA64_GRANULE_SIZE or order
-- 
cgit v1.2.3-70-g09d2


From 0b9afede3d9c66fef06f1d5ef5ff15c4b97730fc Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Tue, 6 Sep 2005 11:20:49 -0600
Subject: [IA64] more robust zx1/sx1000 machvec support

Machine vector selection has always been a bit of a hack given how
early in system boot it needs to be done.  Services like ACPI namespace
are not available and there are non-trivial problems to moving them to
early boot.  However, there's no reason we can't change to a different
machvec later in boot when the services we need are available.  By
adding a entry point for later initialization of the swiotlb, we can add
an error path for the hpzx1 machevec initialization and fall back to the
DIG machine vector if IOMMU hardware isn't found in the system.  Since
ia64 uses 4GB for zone DMA (no ISA support), it's trivial to allocate a
contiguous range from the slab for bounce buffer usage.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/hp/common/hwsw_iommu.c         |  13 +++-
 arch/ia64/hp/common/sba_iommu.c          |  45 ++++++++++----
 arch/ia64/lib/swiotlb.c                  | 102 +++++++++++++++++++++++++++++++
 include/asm-ia64/machvec_hpzx1.h         |  21 +++----
 include/asm-ia64/machvec_hpzx1_swiotlb.h |   3 +-
 5 files changed, 156 insertions(+), 28 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 80f8ef01393..317c334c5a1 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -17,7 +17,7 @@
 #include <asm/machvec.h>
 
 /* swiotlb declarations & definitions: */
-extern void swiotlb_init_with_default_size (size_t size);
+extern int swiotlb_late_init_with_default_size (size_t size);
 extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
 extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
 extern ia64_mv_dma_map_single		swiotlb_map_single;
@@ -67,7 +67,16 @@ void
 hwsw_init (void)
 {
 	/* default to a smallish 2MB sw I/O TLB */
-	swiotlb_init_with_default_size (2 * (1<<20));
+	if (swiotlb_late_init_with_default_size (2 * (1<<20)) != 0) {
+#ifdef CONFIG_IA64_GENERIC
+		/* Better to have normal DMA than panic */
+		printk(KERN_WARNING "%s: Failed to initialize software I/O TLB,"
+		       " reverting to hpzx1 platform vector\n", __FUNCTION__);
+		machvec_init("hpzx1");
+#else
+		panic("Unable to initialize software I/O TLB services");
+#endif
+	}
 }
 
 void *
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 11957598a8b..e64ca04ace8 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2028,9 +2028,40 @@ static struct acpi_driver acpi_sba_ioc_driver = {
 static int __init
 sba_init(void)
 {
+	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
+		return 0;
+
 	acpi_bus_register_driver(&acpi_sba_ioc_driver);
-	if (!ioc_list)
+	if (!ioc_list) {
+#ifdef CONFIG_IA64_GENERIC
+		extern int swiotlb_late_init_with_default_size (size_t size);
+
+		/*
+		 * If we didn't find something sba_iommu can claim, we
+		 * need to setup the swiotlb and switch to the dig machvec.
+		 */
+		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+			panic("Unable to find SBA IOMMU or initialize "
+			      "software I/O TLB: Try machvec=dig boot option");
+		machvec_init("dig");
+#else
+		panic("Unable to find SBA IOMMU: Try a generic or DIG kernel");
+#endif
 		return 0;
+	}
+
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB)
+	/*
+	 * hpzx1_swiotlb needs to have a fairly small swiotlb bounce
+	 * buffer setup to support devices with smaller DMA masks than
+	 * sba_iommu can handle.
+	 */
+	if (ia64_platform_is("hpzx1_swiotlb")) {
+		extern void hwsw_init(void);
+
+		hwsw_init();
+	}
+#endif
 
 #ifdef CONFIG_PCI
 	{
@@ -2048,18 +2079,6 @@ sba_init(void)
 
 subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
 
-extern void dig_setup(char**);
-/*
- * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good,
- * so we use the platform_setup hook to fix it up.
- */
-void __init
-sba_setup(char **cmdline_p)
-{
-	MAX_DMA_ADDRESS = ~0UL;
-	dig_setup(cmdline_p);
-}
-
 static int __init
 nosbagart(char *str)
 {
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index dbc0b3e449c..875b0c16250 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -49,6 +49,15 @@
  */
 #define IO_TLB_SHIFT 11
 
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+
+/*
+ * Minimum IO TLB size to bother booting with.  Systems with mainly
+ * 64bit capable cards will only lightly use the swiotlb.  If we can't
+ * allocate a contiguous 1MB, we're probably in trouble anyway.
+ */
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+
 int swiotlb_force;
 
 /*
@@ -154,6 +163,99 @@ swiotlb_init (void)
 	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
 }
 
+/*
+ * Systems with larger DMA zones (those that don't support ISA) can
+ * initialize the swiotlb later using the slab allocator if needed.
+ * This should be just like above, but with some error catching.
+ */
+int
+swiotlb_late_init_with_default_size (size_t default_size)
+{
+	unsigned long i, req_nslabs = io_tlb_nslabs;
+	unsigned int order;
+
+	if (!io_tlb_nslabs) {
+		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+	io_tlb_nslabs = SLABS_PER_PAGE << order;
+
+	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+		                                        order);
+		if (io_tlb_start)
+			break;
+		order--;
+	}
+
+	if (!io_tlb_start)
+		goto cleanup1;
+
+	if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
+		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
+		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+		io_tlb_nslabs = SLABS_PER_PAGE << order;
+	}
+	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+	memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+
+	/*
+	 * Allocate and initialize the free list array.  This array is used
+	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+	 * between io_tlb_start and io_tlb_end.
+	 */
+	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+	                              get_order(io_tlb_nslabs * sizeof(int)));
+	if (!io_tlb_list)
+		goto cleanup2;
+
+	for (i = 0; i < io_tlb_nslabs; i++)
+ 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+	io_tlb_index = 0;
+
+	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
+	                           get_order(io_tlb_nslabs * sizeof(char *)));
+	if (!io_tlb_orig_addr)
+		goto cleanup3;
+
+	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
+
+	/*
+	 * Get the overflow emergency buffer
+	 */
+	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+	                                          get_order(io_tlb_overflow));
+	if (!io_tlb_overflow_buffer)
+		goto cleanup4;
+
+	printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
+	       "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
+	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
+
+	return 0;
+
+cleanup4:
+	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+	                                                      sizeof(char *)));
+	io_tlb_orig_addr = NULL;
+cleanup3:
+	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+	                                                 sizeof(int)));
+	io_tlb_list = NULL;
+	io_tlb_end = NULL;
+cleanup2:
+	free_pages((unsigned long)io_tlb_start, order);
+	io_tlb_start = NULL;
+cleanup1:
+	io_tlb_nslabs = req_nslabs;
+	return -ENOMEM;
+}
+
 static inline int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr)
 {
diff --git a/include/asm-ia64/machvec_hpzx1.h b/include/asm-ia64/machvec_hpzx1.h
index daafe504c5f..e90daf9ce34 100644
--- a/include/asm-ia64/machvec_hpzx1.h
+++ b/include/asm-ia64/machvec_hpzx1.h
@@ -1,8 +1,7 @@
 #ifndef _ASM_IA64_MACHVEC_HPZX1_h
 #define _ASM_IA64_MACHVEC_HPZX1_h
 
-extern ia64_mv_setup_t dig_setup;
-extern ia64_mv_setup_t			sba_setup;
+extern ia64_mv_setup_t			dig_setup;
 extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
 extern ia64_mv_dma_free_coherent	sba_free_coherent;
 extern ia64_mv_dma_map_single		sba_map_single;
@@ -19,15 +18,15 @@ extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
  * platform's machvec structure.  When compiling a non-generic kernel,
  * the macros are used directly.
  */
-#define platform_name			"hpzx1"
-#define platform_setup			sba_setup
-#define platform_dma_init		machvec_noop
-#define platform_dma_alloc_coherent	sba_alloc_coherent
-#define platform_dma_free_coherent	sba_free_coherent
-#define platform_dma_map_single		sba_map_single
-#define platform_dma_unmap_single	sba_unmap_single
-#define platform_dma_map_sg		sba_map_sg
-#define platform_dma_unmap_sg		sba_unmap_sg
+#define platform_name				"hpzx1"
+#define platform_setup				dig_setup
+#define platform_dma_init			machvec_noop
+#define platform_dma_alloc_coherent		sba_alloc_coherent
+#define platform_dma_free_coherent		sba_free_coherent
+#define platform_dma_map_single			sba_map_single
+#define platform_dma_unmap_single		sba_unmap_single
+#define platform_dma_map_sg			sba_map_sg
+#define platform_dma_unmap_sg			sba_unmap_sg
 #define platform_dma_sync_single_for_cpu	machvec_dma_sync_single
 #define platform_dma_sync_sg_for_cpu		machvec_dma_sync_sg
 #define platform_dma_sync_single_for_device	machvec_dma_sync_single
diff --git a/include/asm-ia64/machvec_hpzx1_swiotlb.h b/include/asm-ia64/machvec_hpzx1_swiotlb.h
index 9924b1b00a6..f00a34a148f 100644
--- a/include/asm-ia64/machvec_hpzx1_swiotlb.h
+++ b/include/asm-ia64/machvec_hpzx1_swiotlb.h
@@ -2,7 +2,6 @@
 #define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
 
 extern ia64_mv_setup_t				dig_setup;
-extern ia64_mv_dma_init				hwsw_init;
 extern ia64_mv_dma_alloc_coherent		hwsw_alloc_coherent;
 extern ia64_mv_dma_free_coherent		hwsw_free_coherent;
 extern ia64_mv_dma_map_single			hwsw_map_single;
@@ -26,7 +25,7 @@ extern ia64_mv_dma_sync_sg_for_device		hwsw_sync_sg_for_device;
 #define platform_name				"hpzx1_swiotlb"
 
 #define platform_setup				dig_setup
-#define platform_dma_init			hwsw_init
+#define platform_dma_init			machvec_noop
 #define platform_dma_alloc_coherent		hwsw_alloc_coherent
 #define platform_dma_free_coherent		hwsw_free_coherent
 #define platform_dma_map_single			hwsw_map_single
-- 
cgit v1.2.3-70-g09d2


From 24ee0a6d7b0a52b140c880aae24c255de3b4a9a1 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 12 Sep 2005 12:15:43 -0500
Subject: [IA64] Cleanup use of various #defines related to nodes

Some of the SN code & #defines related to compact nodes & IO discovery
have gotten stale over the years. This patch attempts to clean them up.
Some of the various SN MAX_xxx #defines were also unclear & misused.

The primary changes are:

	- use MAX_NUMNODES. This is the generic linux #define for the number
	  of nodes that are known to the generic kernel. Arrays & loops
	  for constructs that are 1:1 with linux-defined nodes should
	  use the linux #define - not an SN equivalent.

	- use MAX_COMPACT_NODES for MAX_NUMNODES + NUM_TIOS. This is the
	  number of nodes in the SSI system. Compact nodes are a hack to
	  get around the IA64 architectural limit of 256 nodes. Large SGI
	  systems have more than 256 nodes. When we upgrade to ACPI3.0,
	  I _hope_ that all nodes will be real nodes that are known to
	  the generic kernel. That will allow us to delete the notion
	  of "compact nodes".

	- add MAX_NUMALINK_NODES for the total number of nodes that
	  are in the numalink domain - all partitions.

	- simplified (understandable) scan_for_ionodes()

	- small amount of cleanup related to cnodes

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/io_init.c       |   4 +-
 arch/ia64/sn/kernel/setup.c         | 160 ++++++++++++------------------------
 arch/ia64/sn/kernel/sn2/sn_hwperf.c |   4 +-
 arch/ia64/sn/kernel/tiocx.c         |   5 +-
 arch/ia64/sn/kernel/xpc_partition.c |   2 +-
 drivers/char/snsc.c                 |   4 +-
 include/asm-ia64/sn/arch.h          |  36 +++++---
 include/asm-ia64/sn/io.h            |   2 +-
 include/asm-ia64/sn/klconfig.h      |  34 +-------
 include/asm-ia64/sn/sn_cpuid.h      |   3 -
 include/asm-ia64/sn/sn_sal.h        |  12 +--
 include/asm-ia64/sn/xp.h            |   2 +-
 12 files changed, 92 insertions(+), 176 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 906622d9f93..b4f5053f5e1 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -22,8 +22,6 @@
 #include "xtalk/hubdev.h"
 #include "xtalk/xwidgetdev.h"
 
-nasid_t master_nasid = INVALID_NASID;	/* Partition Master */
-
 static struct list_head sn_sysdata_list;
 
 /* sysdata list struct */
@@ -165,7 +163,7 @@ static void sn_fixup_ionodes(void)
 	 * Get SGI Specific HUB chipset information.
 	 * Inform Prom that this kernel can support domain bus numbering.
 	 */
-	for (i = 0; i < numionodes; i++) {
+	for (i = 0; i < num_cnodes; i++) {
 		hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
 		nasid = cnodeid_to_nasid(i);
 		hubdev->max_segment_number = 0xffffffff;
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 6f8c5883716..0fb579ef18c 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct pda_s, pda_percpu);
 
 #define MAX_PHYS_MEMORY		(1UL << IA64_MAX_PHYS_BITS)	/* Max physical address supported */
 
-lboard_t *root_lboard[MAX_COMPACT_NODES];
-
 extern void bte_init_node(nodepda_t *, cnodeid_t);
 
 extern void sn_timer_init(void);
@@ -97,15 +95,15 @@ u8 sn_region_size;
 EXPORT_SYMBOL(sn_region_size);
 int sn_prom_type;	/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
 
-short physical_node_map[MAX_PHYSNODE_ID];
+short physical_node_map[MAX_NUMALINK_NODES];
 static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
 
 EXPORT_SYMBOL(physical_node_map);
 
-int numionodes;
+int num_cnodes;
 
 static void sn_init_pdas(char **);
-static void scan_for_ionodes(void);
+static void build_cnode_tables(void);
 
 static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
 
@@ -139,19 +137,6 @@ extern char drive_info[4 * 16];
 char drive_info[4 * 16];
 #endif
 
-/*
- * Get nasid of current cpu early in boot before nodepda is initialized
- */
-static int
-boot_get_nasid(void)
-{
-	int nasid;
-
-	if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL))
-		BUG();
-	return nasid;
-}
-
 /*
  * This routine can only be used during init, since
  * smp_boot_data is an init data structure.
@@ -223,7 +208,6 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-extern nasid_t master_nasid;
 static int __initdata shub_1_1_found = 0;
 
 /*
@@ -269,7 +253,6 @@ static void __init sn_check_for_wars(void)
 void __init sn_setup(char **cmdline_p)
 {
 	long status, ticks_per_sec, drift;
-	int pxm;
 	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
@@ -300,11 +283,10 @@ void __init sn_setup(char **cmdline_p)
 
 	MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
 
-	memset(physical_node_map, -1, sizeof(physical_node_map));
-	for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++)
-		if (pxm_to_nid_map[pxm] != -1)
-			physical_node_map[pxm_to_nasid(pxm)] =
-			    pxm_to_nid_map[pxm];
+	/*
+	 * Build the tables for managing cnodes.
+	 */
+	build_cnode_tables();
 
 	/*
 	 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
@@ -319,8 +301,6 @@ void __init sn_setup(char **cmdline_p)
 
 	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
-	master_nasid = boot_get_nasid();
-
 	status =
 	    ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
 			       &drift);
@@ -378,15 +358,6 @@ static void __init sn_init_pdas(char **cmdline_p)
 {
 	cnodeid_t cnode;
 
-	memset(sn_cnodeid_to_nasid, -1,
-			sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
-	for_each_online_node(cnode)
-		sn_cnodeid_to_nasid[cnode] =
-				pxm_to_nasid(nid_to_pxm_map[cnode]);
-
-	numionodes = num_online_nodes();
-	scan_for_ionodes();
-
 	/*
 	 * Allocate & initalize the nodepda for each node.
 	 */
@@ -402,7 +373,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	/*
 	 * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
 	 */
-	for (cnode = num_online_nodes(); cnode < numionodes; cnode++) {
+	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
 		nodepdaindr[cnode] =
 		    alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
 		memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
@@ -411,7 +382,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	/*
 	 * Now copy the array of nodepda pointers to each nodepda.
 	 */
-	for (cnode = 0; cnode < numionodes; cnode++)
+	for (cnode = 0; cnode < num_cnodes; cnode++)
 		memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
 		       sizeof(nodepdaindr));
 
@@ -428,7 +399,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 * Initialize the per node hubdev.  This includes IO Nodes and
 	 * headless/memless nodes.
 	 */
-	for (cnode = 0; cnode < numionodes; cnode++) {
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
 		hubdev_init_node(nodepdaindr[cnode], cnode);
 	}
 }
@@ -553,87 +524,58 @@ void __init sn_cpu_init(void)
 }
 
 /*
- * Scan klconfig for ionodes.  Add the nasids to the
- * physical_node_map and the pda and increment numionodes.
+ * Build tables for converting between NASIDs and cnodes.
  */
+static inline int __init board_needs_cnode(int type)
+{
+	return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
+}
 
-static void __init scan_for_ionodes(void)
+void __init build_cnode_tables(void)
 {
-	int nasid = 0;
+	int nasid;
+	int node;
 	lboard_t *brd;
 
-	/* fakeprom does not support klgraph */
-	if (IS_RUNNING_ON_FAKE_PROM())
-		return;
-
-	/* Setup ionodes with memory */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		char *klgraph_header;
-		cnodeid_t cnodeid;
-
-		if (physical_node_map[nasid] == -1)
-			continue;
+	memset(physical_node_map, -1, sizeof(physical_node_map));
+	memset(sn_cnodeid_to_nasid, -1,
+			sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
 
-		cnodeid = -1;
-		klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid));
-		if (!klgraph_header) {
-			BUG();	/* All nodes must have klconfig tables! */
-		}
-		cnodeid = nasid_to_cnodeid(nasid);
-		root_lboard[cnodeid] = (lboard_t *)
-		    NODE_OFFSET_TO_LBOARD((nasid),
-					  ((kl_config_hdr_t
-					    *) (klgraph_header))->
-					  ch_board_info);
+	/*
+	 * First populate the tables with C/M bricks. This ensures that
+	 * cnode == node for all C & M bricks.
+	 */
+	for_each_online_node(node) {
+		nasid = pxm_to_nasid(nid_to_pxm_map[node]);
+		sn_cnodeid_to_nasid[node] = nasid;
+		physical_node_map[nasid] = node;
 	}
 
-	/* Scan headless/memless IO Nodes. */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		/* if there's no nasid, don't try to read the klconfig on the node */
-		if (physical_node_map[nasid] == -1)
-			continue;
-		brd = find_lboard_any((lboard_t *)
-				      root_lboard[nasid_to_cnodeid(nasid)],
-				      KLTYPE_SNIA);
-		if (brd) {
-			brd = KLCF_NEXT_ANY(brd);	/* Skip this node's lboard */
-			if (!brd)
-				continue;
-		}
-
-		brd = find_lboard_any(brd, KLTYPE_SNIA);
+	/*
+	 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
+	 * limit on the number of nodes, we can't use the generic node numbers 
+	 * for this. Note that num_cnodes is incremented below as TIOs or
+	 * headless/memoryless nodes are discovered.
+	 */
+	num_cnodes = num_online_nodes();
 
-		while (brd) {
-			sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
-			physical_node_map[brd->brd_nasid] = numionodes;
-			root_lboard[numionodes] = brd;
-			numionodes++;
-			brd = KLCF_NEXT_ANY(brd);
-			if (!brd)
-				break;
-
-			brd = find_lboard_any(brd, KLTYPE_SNIA);
-		}
-	}
+	/* fakeprom does not support klgraph */
+	if (IS_RUNNING_ON_FAKE_PROM())
+		return;
 
-	/* Scan for TIO nodes. */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		/* if there's no nasid, don't try to read the klconfig on the node */
-		if (physical_node_map[nasid] == -1)
-			continue;
-		brd = find_lboard_any((lboard_t *)
-				      root_lboard[nasid_to_cnodeid(nasid)],
-				      KLTYPE_TIO);
+	/* Find TIOs & headless/memoryless nodes and add them to the tables */
+	for_each_online_node(node) {
+		kl_config_hdr_t *klgraph_header;
+		nasid = cnodeid_to_nasid(node);
+		if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
+			BUG();
+		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
-			sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
-			physical_node_map[brd->brd_nasid] = numionodes;
-			root_lboard[numionodes] = brd;
-			numionodes++;
-			brd = KLCF_NEXT_ANY(brd);
-			if (!brd)
-				break;
-
-			brd = find_lboard_any(brd, KLTYPE_TIO);
+			if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
+				sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid;
+				physical_node_map[brd->brd_nasid] = num_cnodes++;
+			}
+			brd = find_lboard_next(brd);
 		}
 	}
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 0513aacac8c..6c6fbca3229 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -476,8 +476,8 @@ static int sn_topology_show(struct seq_file *s, void *d)
 				for_each_online_cpu(j) {
 					seq_printf(s, j ? ":%d" : ", dist %d",
 						node_distance(
-						    cpuid_to_cnodeid(i),
-						    cpuid_to_cnodeid(j)));
+						    cpu_to_node(i),
+						    cpu_to_node(j)));
 				}
 				seq_putc(s, '\n');
 			}
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index b45db5133f5..7e9764a69dc 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -486,11 +486,10 @@ static int __init tiocx_init(void)
 
 	bus_register(&tiocx_bus_type);
 
-	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
+	for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) {
 		nasid_t nasid;
 
-		if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
-			break;	/* No more nasids .. bail out of loop */
+		nasid = cnodeid_to_nasid(cnodeid);
 
 		if ((nasid & 0x1) && is_fpga_brick(nasid)) {
 			struct hubdev_info *hubdev;
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 578265ea9e6..72ef330fb78 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -44,7 +44,7 @@ static u64 xpc_sh2_IPI_access3;
 
 
 /* original protection values for each node */
-u64 xpc_prot_vec[MAX_COMPACT_NODES];
+u64 xpc_prot_vec[MAX_NUMNODES];
 
 
 /* this partition's reserved page */
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 261a41bf6d0..a025a89ea70 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -377,7 +377,7 @@ scdrv_init(void)
 	dev_t first_dev, dev;
 	nasid_t event_nasid = ia64_sn_get_console_nasid();
 
-	if (alloc_chrdev_region(&first_dev, 0, numionodes,
+	if (alloc_chrdev_region(&first_dev, 0, num_cnodes,
 				SYSCTL_BASENAME) < 0) {
 		printk("%s: failed to register SN system controller device\n",
 		       __FUNCTION__);
@@ -385,7 +385,7 @@ scdrv_init(void)
 	}
 	snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
 
-	for (cnode = 0; cnode < numionodes; cnode++) {
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
 			geoid = cnodeid_get_geoid(cnode);
 			devnamep = devname;
 			format_module_id(devnamep, geo_module(geoid),
diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h
index ab827d29856..8fce5a6db95 100644
--- a/include/asm-ia64/sn/arch.h
+++ b/include/asm-ia64/sn/arch.h
@@ -17,6 +17,32 @@
 #include <asm/sn/types.h>
 #include <asm/sn/sn_cpuid.h>
 
+/*
+ * This is the maximum number of NUMALINK nodes that can be part of a single
+ * SSI kernel. This number includes C-brick, M-bricks, and TIOs. Nodes in
+ * remote partitions are NOT included in this number.
+ * The number of compact nodes cannot exceed size of a coherency domain.
+ * The purpose of this define is to specify a node count that includes
+ * all C/M/TIO nodes in an SSI system.
+ *
+ * SGI system can currently support up to 256 C/M nodes plus additional TIO nodes.
+ *
+ * 	Note: ACPI20 has an architectural limit of 256 nodes. When we upgrade
+ * 	to ACPI3.0, this limit will be removed. The notion of "compact nodes"
+ * 	should be deleted and TIOs should be included in MAX_NUMNODES.
+ */
+#define MAX_COMPACT_NODES	512
+
+/*
+ * Maximum number of nodes in all partitions and in all coherency domains.
+ * This is the total number of nodes accessible in the numalink fabric. It
+ * includes all C & M bricks, plus all TIOs.
+ *
+ * This value is also the value of the maximum number of NASIDs in the numalink
+ * fabric.
+ */
+#define MAX_NUMALINK_NODES	2048
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
  * frequently referenced. They are kept in the per-cpu data areas of each cpu.
@@ -40,15 +66,6 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
 #define enable_shub_wars_1_1()	(sn_hub_info->shub_1_1_found)
 
 
-/*
- * This is the maximum number of nodes that can be part of a kernel.
- * Effectively, it's the maximum number of compact node ids (cnodeid_t).
- * This is not necessarily the same as MAX_NASIDS.
- */
-#define MAX_COMPACT_NODES	2048
-#define CPUS_PER_NODE		4
-
-
 /*
  * Compact node ID to nasid mappings kept in the per-cpu data areas of each
  * cpu.
@@ -57,7 +74,6 @@ DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
 #define sn_cnodeid_to_nasid	(&__get_cpu_var(__sn_cnodeid_to_nasid[0]))
 
 
-
 extern u8 sn_partition_id;
 extern u8 sn_system_size;
 extern u8 sn_sharing_domain_size;
diff --git a/include/asm-ia64/sn/io.h b/include/asm-ia64/sn/io.h
index 42209733f6b..ac30c747c5a 100644
--- a/include/asm-ia64/sn/io.h
+++ b/include/asm-ia64/sn/io.h
@@ -14,7 +14,7 @@
 extern void * sn_io_addr(unsigned long port) __attribute_const__; /* Forward definition */
 extern void __sn_mmiowb(void); /* Forward definition */
 
-extern int numionodes;
+extern int num_cnodes;
 
 #define __sn_mf_a()   ia64_mfa()
 
diff --git a/include/asm-ia64/sn/klconfig.h b/include/asm-ia64/sn/klconfig.h
index 9f920c70a62..bcbf209d63b 100644
--- a/include/asm-ia64/sn/klconfig.h
+++ b/include/asm-ia64/sn/klconfig.h
@@ -208,19 +208,6 @@ typedef struct lboard_s {
 	klconf_off_t	brd_next_same;    /* Next BOARD with same nasid */
 } lboard_t;
 
-#define KLCF_NUM_COMPS(_brd)	((_brd)->brd_numcompts)
-#define NODE_OFFSET_TO_KLINFO(n,off)    ((klinfo_t*) TO_NODE_CAC(n,off))
-#define KLCF_NEXT(_brd)         \
-        ((_brd)->brd_next_same ?     \
-         (NODE_OFFSET_TO_LBOARD((_brd)->brd_next_same_host, (_brd)->brd_next_same)): NULL)
-#define KLCF_NEXT_ANY(_brd)         \
-        ((_brd)->brd_next_any ?     \
-         (NODE_OFFSET_TO_LBOARD(NASID_GET(_brd), (_brd)->brd_next_any)): NULL)
-#define KLCF_COMP(_brd, _ndx)   \
-                ((((_brd)->brd_compts[(_ndx)]) == 0) ? 0 : \
-			(NODE_OFFSET_TO_KLINFO(NASID_GET(_brd), (_brd)->brd_compts[(_ndx)])))
-
-
 /*
  * Generic info structure. This stores common info about a 
  * component.
@@ -249,24 +236,11 @@ typedef struct klinfo_s {                  /* Generic info */
 } klinfo_t ;
 
 
-static inline lboard_t *find_lboard_any(lboard_t * start, unsigned char brd_type)
+static inline lboard_t *find_lboard_next(lboard_t * brd)
 {
-        /* Search all boards stored on this node. */
-
-        while (start) {
-                if (start->brd_type == brd_type)
-                        return start;
-                start = KLCF_NEXT_ANY(start);
-        }
-        /* Didn't find it. */
-        return (lboard_t *) NULL;
+	if (brd && brd->brd_next_any)
+		return NODE_OFFSET_TO_LBOARD(NASID_GET(brd), brd->brd_next_any);
+        return NULL;
 }
 
-
-/* external declarations of Linux kernel functions. */
-
-extern lboard_t *root_lboard[];
-extern klinfo_t *find_component(lboard_t *brd, klinfo_t *kli, unsigned char type);
-extern klinfo_t *find_first_component(lboard_t *brd, unsigned char type);
-
 #endif /* _ASM_IA64_SN_KLCONFIG_H */
diff --git a/include/asm-ia64/sn/sn_cpuid.h b/include/asm-ia64/sn/sn_cpuid.h
index d2c1d34dcce..749deb2ca6c 100644
--- a/include/asm-ia64/sn/sn_cpuid.h
+++ b/include/asm-ia64/sn/sn_cpuid.h
@@ -105,7 +105,6 @@ extern short physical_node_map[];	/* indexed by nasid to get cnode */
 #define cpuid_to_nasid(cpuid)		(sn_nodepda->phys_cpuid[cpuid].nasid)
 #define cpuid_to_subnode(cpuid)		(sn_nodepda->phys_cpuid[cpuid].subnode)
 #define cpuid_to_slice(cpuid)		(sn_nodepda->phys_cpuid[cpuid].slice)
-#define cpuid_to_cnodeid(cpuid)		(physical_node_map[cpuid_to_nasid(cpuid)])
 
 
 /*
@@ -113,8 +112,6 @@ extern short physical_node_map[];	/* indexed by nasid to get cnode */
  * of potentially large tables.
  */
 extern int nasid_slice_to_cpuid(int, int);
-#define nasid_slice_to_cpu_physical_id(nasid, slice)			\
-	cpu_physical_id(nasid_slice_to_cpuid(nasid, slice))
 
 /*
  * cnodeid_to_nasid - convert a cnodeid to a NASID
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index fea35b33d4e..5ad855db846 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -198,26 +198,16 @@ ia64_sn_get_master_baseio_nasid(void)
 	return ret_stuff.v0;
 }
 
-static inline char *
+static inline void *
 ia64_sn_get_klconfig_addr(nasid_t nasid)
 {
 	struct ia64_sal_retval ret_stuff;
-	int cnodeid;
 
-	cnodeid = nasid_to_cnodeid(nasid);
 	ret_stuff.status = 0;
 	ret_stuff.v0 = 0;
 	ret_stuff.v1 = 0;
 	ret_stuff.v2 = 0;
 	SAL_CALL(ret_stuff, SN_SAL_GET_KLCONFIG_ADDR, (u64)nasid, 0, 0, 0, 0, 0, 0);
-
-	/*
-	 * We should panic if a valid cnode nasid does not produce
-	 * a klconfig address.
-	 */
-	if (ret_stuff.status != 0) {
-		panic("ia64_sn_get_klconfig_addr: Returned error %lx\n", ret_stuff.status);
-	}
 	return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL;
 }
 
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
index 1df1c9f61a6..75a2f39c6ac 100644
--- a/include/asm-ia64/sn/xp.h
+++ b/include/asm-ia64/sn/xp.h
@@ -49,7 +49,7 @@
  * C-brick nasids, thus the need for bitmaps which don't account for
  * odd-numbered (non C-brick) nasids.
  */
-#define XP_MAX_PHYSNODE_ID	(MAX_PHYSNODE_ID / 2)
+#define XP_MAX_PHYSNODE_ID	(MAX_NUMALINK_NODES / 2)
 #define XP_NASID_MASK_BYTES	((XP_MAX_PHYSNODE_ID + 7) / 8)
 #define XP_NASID_MASK_WORDS	((XP_MAX_PHYSNODE_ID + 63) / 64)
 
-- 
cgit v1.2.3-70-g09d2


From 61b9cf7c6cf5077c40ad37480fa56f6574af3db5 Mon Sep 17 00:00:00 2001
From: Mark Maule <maule@sgi.com>
Date: Fri, 23 Sep 2005 12:31:53 -0500
Subject: [IA64-SGI] fix sn_pci_legacy_read/fix sn_pci_legacy_write

This patch adds a #define for SN_SAL_IOIF_PCI_SAFE and makes that the
preferred method of implementing sn_pci_legacy_read() and
sn_pci_legacy_write().

This SAL call has been present in SGI proms since version 4.10.  If the
SN_SAL_IOIF_PCI_SAFE call fails, revert to the previous code for compatability
with older proms.

Signed-off-by: Mark Maule <maule@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/pci/pci_dma.c   | 46 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-ia64/sn/sn_sal.h |  2 +-
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 0e4b9ad9ef0..abdf6eea6ac 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -326,6 +326,29 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
 {
 	unsigned long addr;
 	int ret;
+	struct ia64_sal_retval isrv;
+
+	/*
+	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
+	 * around hw issues at the pci bus level.  SGI proms older than
+	 * 4.10 don't implment this.
+	 */
+
+	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
+		pci_domain_nr(bus), bus->number,
+		0, /* io */
+		0, /* read */
+		port, size, __pa(val));
+
+	if (isrv.status == 0)
+		return size;
+
+	/*
+	 * If the above failed, retry using the SAL_PROBE call which should
+	 * be present in all proms (but which cannot work round PCI chipset
+	 * bugs).  This code is retained for compatability with old
+	 * pre-4.10 proms, and should be removed at some point in the future.
+	 */
 
 	if (!SN_PCIBUS_BUSSOFT(bus))
 		return -ENODEV;
@@ -349,6 +372,29 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
 	int ret = size;
 	unsigned long paddr;
 	unsigned long *addr;
+	struct ia64_sal_retval isrv;
+
+	/*
+	 * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work
+	 * around hw issues at the pci bus level.  SGI proms older than
+	 * 4.10 don't implment this.
+	 */
+
+	SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE,
+		pci_domain_nr(bus), bus->number,
+		0, /* io */
+		1, /* write */
+		port, size, __pa(&val));
+
+	if (isrv.status == 0)
+		return size;
+
+	/*
+	 * If the above failed, retry using the SAL_PROBE call which should
+	 * be present in all proms (but which cannot work round PCI chipset
+	 * bugs).  This code is retained for compatability with old
+	 * pre-4.10 proms, and should be removed at some point in the future.
+	 */
 
 	if (!SN_PCIBUS_BUSSOFT(bus)) {
 		ret = -ENODEV;
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index fea35b33d4e..b2d32837723 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -67,7 +67,7 @@
 #define  SN_SAL_IOIF_INTERRUPT			   0x0200004a
 #define  SN_SAL_HWPERF_OP			   0x02000050   // lock
 #define  SN_SAL_IOIF_ERROR_INTERRUPT		   0x02000051
-
+#define  SN_SAL_IOIF_PCI_SAFE			   0x02000052
 #define  SN_SAL_IOIF_SLOT_ENABLE		   0x02000053
 #define  SN_SAL_IOIF_SLOT_DISABLE		   0x02000054
 #define  SN_SAL_IOIF_GET_HUBDEV_INFO		   0x02000055
-- 
cgit v1.2.3-70-g09d2


From 59c422358d6573716f2bf2e78e5b12c20eff5a31 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 27 Sep 2005 08:25:32 -0500
Subject: [IA64-SGI] Increase max system size of SGI SN systems

Increase the maximum system size of SGI SN systems. Note that
this is not the maximum SSI size. The maximum system size is
the number of nodes in the numalink domain.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/arch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h
index 8fce5a6db95..1a3831c04af 100644
--- a/include/asm-ia64/sn/arch.h
+++ b/include/asm-ia64/sn/arch.h
@@ -41,7 +41,7 @@
  * This value is also the value of the maximum number of NASIDs in the numalink
  * fabric.
  */
-#define MAX_NUMALINK_NODES	2048
+#define MAX_NUMALINK_NODES	16384
 
 /*
  * The following defines attributes of the HUB chip. These attributes are
-- 
cgit v1.2.3-70-g09d2


From 36735554571ea619e38210a20f429798de90adc1 Mon Sep 17 00:00:00 2001
From: Dean Roe <roe@sgi.com>
Date: Mon, 3 Oct 2005 11:51:45 -0500
Subject: [IA64-SGI] Remove references to the SN bist_lock

Remove all references to the bist_lock in the SN code as it
is not used for anything.

Signed-off-by: Dean Roe <roe@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/sn/nodepda.h | 1 -
 include/asm-ia64/sn/sn_sal.h  | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-ia64/sn/nodepda.h b/include/asm-ia64/sn/nodepda.h
index 47bb8100fd0..6f6d69e39ff 100644
--- a/include/asm-ia64/sn/nodepda.h
+++ b/include/asm-ia64/sn/nodepda.h
@@ -55,7 +55,6 @@ struct nodepda_s {
 	 */
 	struct phys_cpuid	phys_cpuid[NR_CPUS];
 	spinlock_t		ptc_lock ____cacheline_aligned_in_smp;
-	spinlock_t		bist_lock;
 };
 
 typedef struct nodepda_s nodepda_t;
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index fea35b33d4e..33aa4043f87 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -694,12 +694,10 @@ sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array)
 	unsigned long irq_flags;
 
 	cnodeid = nasid_to_cnodeid(get_node_number(paddr));
-	// spin_lock(&NODEPDA(cnodeid)->bist_lock);
 	local_irq_save(irq_flags);
 	ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len,
 				(u64)nasid_array, perms, 0, 0, 0);
 	local_irq_restore(irq_flags);
-	// spin_unlock(&NODEPDA(cnodeid)->bist_lock);
 	return ret_stuff.status;
 }
 #define SN_MEMPROT_ACCESS_CLASS_0		0x14a080
-- 
cgit v1.2.3-70-g09d2


From 1be7d9935b9c7fb9bd5964bfaf3ac543381277db Mon Sep 17 00:00:00 2001
From: Bob Picco <bob.picco@hp.com>
Date: Tue, 4 Oct 2005 15:13:50 -0400
Subject: [PATCH] V5 ia64 SPARSEMEM - conditional changes for SPARSEMEM

This patch introduces the conditional changes required for the three
memory models.  With [patch 1/4] there are three memory models; FLATMEM,
DISCONTIG and SPARSEMEM.  Also a new arch include file sparemem.h is
introduced for defining SPARSEMEM parameters.

Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/meminit.h   |  2 +-
 include/asm-ia64/mmzone.h    | 10 +++++++---
 include/asm-ia64/nodedata.h  |  4 ++--
 include/asm-ia64/page.h      |  6 +++---
 include/asm-ia64/sparsemem.h | 20 ++++++++++++++++++++
 5 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 include/asm-ia64/sparsemem.h

(limited to 'include/asm-ia64')

diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 1590dc65b30..74477fc31d5 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -41,7 +41,7 @@ extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg
 #define GRANULEROUNDUP(n)	(((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
 #define ORDERROUNDDOWN(n)	((n) & ~((PAGE_SIZE<<MAX_ORDER)-1))
 
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NUMA
   extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
 #else
 # define call_pernode_memory(start, len, func)	(*func)(start, len, 0)
diff --git a/include/asm-ia64/mmzone.h b/include/asm-ia64/mmzone.h
index d32f51e3d6c..34efe88eb84 100644
--- a/include/asm-ia64/mmzone.h
+++ b/include/asm-ia64/mmzone.h
@@ -15,7 +15,7 @@
 #include <asm/page.h>
 #include <asm/meminit.h>
 
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NUMA
 
 static inline int pfn_to_nid(unsigned long pfn)
 {
@@ -31,6 +31,10 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
+#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+extern int early_pfn_to_nid(unsigned long pfn);
+#endif
+
 #ifdef CONFIG_IA64_DIG /* DIG systems are small */
 # define MAX_PHYSNODE_ID	8
 # define NR_NODE_MEMBLKS	(MAX_NUMNODES * 8)
@@ -39,8 +43,8 @@ static inline int pfn_to_nid(unsigned long pfn)
 # define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
 #endif
 
-#else /* CONFIG_DISCONTIGMEM */
+#else /* CONFIG_NUMA */
 # define NR_NODE_MEMBLKS	(MAX_NUMNODES * 4)
-#endif /* CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_NUMA */
 
 #endif /* _ASM_IA64_MMZONE_H */
diff --git a/include/asm-ia64/nodedata.h b/include/asm-ia64/nodedata.h
index 6b0f3ed89b7..9978c7ce754 100644
--- a/include/asm-ia64/nodedata.h
+++ b/include/asm-ia64/nodedata.h
@@ -17,7 +17,7 @@
 #include <asm/percpu.h>
 #include <asm/mmzone.h>
 
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NUMA
 
 /*
  * Node Data. One of these structures is located on each node of a NUMA system.
@@ -47,6 +47,6 @@ struct ia64_node_data {
  */
 #define NODE_DATA(nid)		(local_node_data->pg_data_ptrs[nid])
 
-#endif /* CONFIG_DISCONTIGMEM */
+#endif /* CONFIG_NUMA */
 
 #endif /* _ASM_IA64_NODEDATA_H */
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 9edffad8c28..ef436b9d06a 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -102,15 +102,15 @@ do {						\
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 extern int ia64_pfn_valid (unsigned long pfn);
-#else
+#elif defined(CONFIG_FLATMEM)
 # define ia64_pfn_valid(pfn) 1
 #endif
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_FLATMEM
 # define pfn_valid(pfn)		(((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
 # define page_to_pfn(page)	((unsigned long) (page - mem_map))
 # define pfn_to_page(pfn)	(mem_map + (pfn))
-#else
+#elif defined(CONFIG_DISCONTIGMEM)
 extern struct page *vmem_map;
 extern unsigned long max_low_pfn;
 # define pfn_valid(pfn)		(((pfn) < max_low_pfn) && ia64_pfn_valid(pfn))
diff --git a/include/asm-ia64/sparsemem.h b/include/asm-ia64/sparsemem.h
new file mode 100644
index 00000000000..67a7c40ec27
--- /dev/null
+++ b/include/asm-ia64/sparsemem.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_IA64_SPARSEMEM_H
+#define _ASM_IA64_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * SECTION_SIZE_BITS            2^N: how big each section will be
+ * MAX_PHYSMEM_BITS             2^N: how much memory we can have in that space
+ */
+
+#define SECTION_SIZE_BITS	(30)
+#define MAX_PHYSMEM_BITS	(50)
+#ifdef CONFIG_FORCE_MAX_ZONEORDER
+#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#undef SECTION_SIZE_BITS
+#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT)
+#endif
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
+#endif /* _ASM_IA64_SPARSEMEM_H */
-- 
cgit v1.2.3-70-g09d2


From e54af724c1ae3530c95135157776c9be65cdb747 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 25 Oct 2005 14:07:43 -0500
Subject: [IA64-SGI] fixes for XPC disengage and open/close protocol

This patch addresses a few issues with the open/close protocol that
were revealed by the newly added disengage functionality combined
with more extensive testing.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc.h           |  21 +++---
 arch/ia64/sn/kernel/xpc_channel.c   | 117 ++++++++++++++++++++---------
 arch/ia64/sn/kernel/xpc_main.c      | 146 ++++++++++++++++++++++++++----------
 arch/ia64/sn/kernel/xpc_partition.c |   8 +-
 include/asm-ia64/sn/xp.h            |   6 +-
 5 files changed, 208 insertions(+), 90 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index 565822ab3d0..ae51d7b4c42 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -417,6 +417,9 @@ struct xpc_channel {
 	atomic_t n_on_msg_allocate_wq;   /* #on msg allocation wait queue */
 	wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
 
+	u8 delayed_IPI_flags;		/* IPI flags received, but delayed */
+					/* action until channel disconnected */
+
 	/* queue of msg senders who want to be notified when msg received */
 
 	atomic_t n_to_notify;		/* #of msg senders to notify */
@@ -478,7 +481,8 @@ struct xpc_channel {
 
 #define	XPC_C_DISCONNECTED	0x00002000 /* channel is disconnected */
 #define	XPC_C_DISCONNECTING	0x00004000 /* channel is being disconnected */
-#define	XPC_C_WDISCONNECT	0x00008000 /* waiting for channel disconnect */
+#define	XPC_C_DISCONNECTCALLOUT	0x00008000 /* chan disconnected callout made */
+#define	XPC_C_WDISCONNECT	0x00010000 /* waiting for channel disconnect */
 
 
@@ -508,13 +512,13 @@ struct xpc_partition {
 	int reason_line;		/* line# deactivation initiated from */
 	int reactivate_nasid;		/* nasid in partition to reactivate */
 
-	unsigned long disengage_request_timeout; /* timeout in XPC_TICKS */
+	unsigned long disengage_request_timeout; /* timeout in jiffies */
 	struct timer_list disengage_request_timer;
 
 
 	/* XPC infrastructure referencing and teardown control */
 
-	volatile u8 setup_state;			/* infrastructure setup state */
+	volatile u8 setup_state;	/* infrastructure setup state */
 	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
 	atomic_t references;		/* #of references to infrastructure */
 
@@ -604,7 +608,7 @@ struct xpc_partition {
 
 
 /* number of seconds to wait for other partitions to disengage */
-#define XPC_DISENGAGE_REQUEST_TIMELIMIT 90
+#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT	90
 
 /* interval in seconds to print 'waiting disengagement' messages */
 #define XPC_DISENGAGE_PRINTMSG_INTERVAL		10
@@ -618,20 +622,18 @@ struct xpc_partition {
 extern struct xpc_registration xpc_registrations[];
 
 
-/* >>> found in xpc_main.c only */
+/* found in xpc_main.c */
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
+extern int xpc_disengage_request_timelimit;
 extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
 extern void xpc_dropped_IPI_check(struct xpc_partition *);
+extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int);
 extern void xpc_disconnect_wait(int);
 
 
-/* found in xpc_main.c and efi-xpc.c */
-extern void xpc_activate_partition(struct xpc_partition *);
-
-
 /* found in xpc_partition.c */
 extern int xpc_exiting;
 extern struct xpc_vars *xpc_vars;
@@ -1077,6 +1079,7 @@ xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
 
 /* given an AMO variable and a channel#, get its associated IPI flags */
 #define XPC_GET_IPI_FLAGS(_amo, _c)	((u8) (((_amo) >> ((_c) * 8)) & 0xff))
+#define XPC_SET_IPI_FLAGS(_amo, _c, _f)	(_amo) |= ((u64) (_f) << ((_c) * 8))
 
 #define	XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
 #define XPC_ANY_MSG_IPI_FLAGS_SET(_amo)       ((_amo) & 0x1010101010101010)
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 195ac1b8e26..abf4fc2a87b 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -792,11 +792,20 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 			"reason=%d\n", ch->number, ch->partid, ch->reason);
 	}
 
-	/* wake the thread that is waiting for this channel to disconnect */
 	if (ch->flags & XPC_C_WDISCONNECT) {
 		spin_unlock_irqrestore(&ch->lock, *irq_flags);
 		up(&ch->wdisconnect_sema);
 		spin_lock_irqsave(&ch->lock, *irq_flags);
+
+	} else if (ch->delayed_IPI_flags) {
+		if (part->act_state != XPC_P_DEACTIVATING) {
+			/* time to take action on any delayed IPI flags */
+			spin_lock(&part->IPI_lock);
+			XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
+							ch->delayed_IPI_flags);
+			spin_unlock(&part->IPI_lock);
+		}
+		ch->delayed_IPI_flags = 0;
 	}
 }
 
@@ -818,6 +827,19 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 
 	spin_lock_irqsave(&ch->lock, irq_flags);
 
+again:
+
+	if ((ch->flags & XPC_C_DISCONNECTED) &&
+					(ch->flags & XPC_C_WDISCONNECT)) {
+		/*
+		 * Delay processing IPI flags until thread waiting disconnect
+		 * has had a chance to see that the channel is disconnected.
+		 */
+		ch->delayed_IPI_flags |= IPI_flags;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return;
+	}
+
 
 	if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
 
@@ -843,14 +865,22 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 
 			/* both sides have finished disconnecting */
 			xpc_process_disconnect(ch, &irq_flags);
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+			goto again;
 		}
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
-			// >>> explain this section
-
 			if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
-				DBUG_ON(part->act_state !=
-							XPC_P_DEACTIVATING);
+				if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
+					 ch_number) & XPC_IPI_OPENREQUEST)) {
+
+					DBUG_ON(ch->delayed_IPI_flags != 0);
+					spin_lock(&part->IPI_lock);
+					XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+							ch_number,
+							XPC_IPI_CLOSEREQUEST);
+					spin_unlock(&part->IPI_lock);
+				}
 				spin_unlock_irqrestore(&ch->lock, irq_flags);
 				return;
 			}
@@ -880,9 +910,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 			}
 
 			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-		} else {
-			xpc_process_disconnect(ch, &irq_flags);
+
+			DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
+
+		xpc_process_disconnect(ch, &irq_flags);
 	}
 
 
@@ -898,7 +932,20 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 		}
 
 		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-		DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST));
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+			if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
+						& XPC_IPI_CLOSEREQUEST)) {
+
+				DBUG_ON(ch->delayed_IPI_flags != 0);
+				spin_lock(&part->IPI_lock);
+				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+						ch_number, XPC_IPI_CLOSEREPLY);
+				spin_unlock(&part->IPI_lock);
+			}
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
 
 		ch->flags |= XPC_C_RCLOSEREPLY;
 
@@ -916,8 +963,14 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 			"channel=%d\n", args->msg_size, args->local_nentries,
 			ch->partid, ch->number);
 
-		if ((ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) ||
-					part->act_state == XPC_P_DEACTIVATING) {
+		if (part->act_state == XPC_P_DEACTIVATING ||
+					(ch->flags & XPC_C_ROPENREQUEST)) {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+			ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
 		}
@@ -931,8 +984,11 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 		 *      msg_size = size of channel's messages in bytes
 		 *      local_nentries = remote partition's local_nentries
 		 */
-		DBUG_ON(args->msg_size == 0);
-		DBUG_ON(args->local_nentries == 0);
+		if (args->msg_size == 0 || args->local_nentries == 0) {
+			/* assume OPENREQUEST was delayed by mistake */
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
 
 		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
 		ch->remote_nentries = args->local_nentries;
@@ -970,7 +1026,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 			return;
 		}
-		DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST));
+		if (!(ch->flags & XPC_C_OPENREQUEST)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
+								&irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
+
 		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
 		DBUG_ON(ch->flags & XPC_C_CONNECTED);
 
@@ -1024,8 +1086,8 @@ xpc_connect_channel(struct xpc_channel *ch)
 	struct xpc_registration *registration = &xpc_registrations[ch->number];
 
 
-	if (down_interruptible(&registration->sema) != 0) {
-		return xpcInterrupted;
+	if (down_trylock(&registration->sema) != 0) {
+		return xpcRetry;
 	}
 
 	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
@@ -1445,19 +1507,11 @@ xpc_initiate_connect(int ch_number)
 		if (xpc_part_ref(part)) {
 			ch = &part->channels[ch_number];
 
-			if (!(ch->flags & XPC_C_DISCONNECTING)) {
-				DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
-				DBUG_ON(ch->flags & XPC_C_CONNECTED);
-				DBUG_ON(ch->flags & XPC_C_SETUP);
-
-				/*
-				 * Initiate the establishment of a connection
-				 * on the newly registered channel to the
-				 * remote partition.
-				 */
-				xpc_wakeup_channel_mgr(part);
-			}
-
+			/*
+			 * Initiate the establishment of a connection on the
+			 * newly registered channel to the remote partition.
+			 */
+			xpc_wakeup_channel_mgr(part);
 			xpc_part_deref(part);
 		}
 	}
@@ -1467,9 +1521,6 @@ xpc_initiate_connect(int ch_number)
 void
 xpc_connected_callout(struct xpc_channel *ch)
 {
-	unsigned long irq_flags;
-
-
 	/* let the registerer know that a connection has been established */
 
 	if (ch->func != NULL) {
@@ -1482,10 +1533,6 @@ xpc_connected_callout(struct xpc_channel *ch)
 		dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
 			"partid=%d, channel=%d\n", ch->partid, ch->number);
 	}
-
-	spin_lock_irqsave(&ch->lock, irq_flags);
-	ch->flags |= XPC_C_CONNECTCALLOUT;
-	spin_unlock_irqrestore(&ch->lock, irq_flags);
 }
 
 
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index feece200b3c..db349c6d4c5 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -91,6 +91,10 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
 static int xpc_hb_check_min_interval = 10;
 static int xpc_hb_check_max_interval = 120;
 
+int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
+static int xpc_disengage_request_min_timelimit = 0;
+static int xpc_disengage_request_max_timelimit = 120;
+
 static ctl_table xpc_sys_xpc_hb_dir[] = {
 	{
 		1,
@@ -129,6 +133,19 @@ static ctl_table xpc_sys_xpc_dir[] = {
 		0555,
 		xpc_sys_xpc_hb_dir
 	},
+	{
+		2,
+		"disengage_request_timelimit",
+		&xpc_disengage_request_timelimit,
+		sizeof(int),
+		0644,
+		NULL,
+		&proc_dointvec_minmax,
+		&sysctl_intvec,
+		NULL,
+		&xpc_disengage_request_min_timelimit,
+		&xpc_disengage_request_max_timelimit
+	},
 	{0}
 };
 static ctl_table xpc_sys_dir[] = {
@@ -153,11 +170,11 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 
-/* used as an indication of when the xpc_hb_checker thread is inactive */
-static DECLARE_MUTEX_LOCKED(xpc_hb_checker_inactive);
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
 
-/* used as an indication of when the xpc_discovery thread is inactive */
-static DECLARE_MUTEX_LOCKED(xpc_discovery_inactive);
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
 
 
 static struct timer_list xpc_hb_timer;
@@ -181,7 +198,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
 	struct xpc_partition *part = (struct xpc_partition *) data;
 
 
-	DBUG_ON(XPC_TICKS < part->disengage_request_timeout);
+	DBUG_ON(jiffies < part->disengage_request_timeout);
 
 	(void) xpc_partition_disengaged(part);
 
@@ -292,8 +309,8 @@ xpc_hb_checker(void *ignore)
 	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 
 
-	/* mark this thread as inactive */
-	up(&xpc_hb_checker_inactive);
+	/* mark this thread as having exited */
+	up(&xpc_hb_checker_exited);
 	return 0;
 }
 
@@ -312,8 +329,8 @@ xpc_initiate_discovery(void *ignore)
 
 	dev_dbg(xpc_part, "discovery thread is exiting\n");
 
-	/* mark this thread as inactive */
-	up(&xpc_discovery_inactive);
+	/* mark this thread as having exited */
+	up(&xpc_discovery_exited);
 	return 0;
 }
 
@@ -703,6 +720,7 @@ xpc_daemonize_kthread(void *args)
 	struct xpc_partition *part = &xpc_partitions[partid];
 	struct xpc_channel *ch;
 	int n_needed;
+	unsigned long irq_flags;
 
 
 	daemonize("xpc%02dc%d", partid, ch_number);
@@ -713,11 +731,14 @@ xpc_daemonize_kthread(void *args)
 	ch = &part->channels[ch_number];
 
 	if (!(ch->flags & XPC_C_DISCONNECTING)) {
-		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
 
 		/* let registerer know that connection has been established */
 
-		if (atomic_read(&ch->kthreads_assigned) == 1) {
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTCALLOUT;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
 			xpc_connected_callout(ch);
 
 			/*
@@ -732,14 +753,23 @@ xpc_daemonize_kthread(void *args)
 					!(ch->flags & XPC_C_DISCONNECTING)) {
 				xpc_activate_kthreads(ch, n_needed);
 			}
+		} else {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
 		}
 
 		xpc_kthread_waitmsgs(part, ch);
 	}
 
 	if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
-		if (ch->flags & XPC_C_CONNECTCALLOUT) {
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
+				!(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
+			ch->flags |= XPC_C_DISCONNECTCALLOUT;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
 			xpc_disconnecting_callout(ch);
+		} else {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
 		}
 		if (atomic_dec_return(&part->nchannels_engaged) == 0) {
 			xpc_mark_partition_disengaged(part);
@@ -780,9 +810,29 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 
 
 	while (needed-- > 0) {
+
+		/*
+		 * The following is done on behalf of the newly created
+		 * kthread. That kthread is responsible for doing the
+		 * counterpart to the following before it exits.
+		 */
+		(void) xpc_part_ref(part);
+		xpc_msgqueue_ref(ch);
+		if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+		    atomic_inc_return(&part->nchannels_engaged) == 1) {
+			xpc_mark_partition_engaged(part);
+		}
+
 		pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
 		if (pid < 0) {
 			/* the fork failed */
+			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+			    atomic_dec_return(&part->nchannels_engaged) == 0) {
+				xpc_mark_partition_disengaged(part);
+				xpc_IPI_send_disengage(part);
+			}
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
 
 			if (atomic_read(&ch->kthreads_assigned) <
 						ch->kthreads_idle_limit) {
@@ -802,18 +852,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 			break;
 		}
 
-		/*
-		 * The following is done on behalf of the newly created
-		 * kthread. That kthread is responsible for doing the
-		 * counterpart to the following before it exits.
-		 */
-		(void) xpc_part_ref(part);
-		xpc_msgqueue_ref(ch);
-		if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
-			if (atomic_inc_return(&part->nchannels_engaged) == 1) {
-				xpc_mark_partition_engaged(part);
-			}
-		}
 		ch->kthreads_created++;	// >>> temporary debug only!!!
 	}
 }
@@ -826,26 +864,49 @@ xpc_disconnect_wait(int ch_number)
 	partid_t partid;
 	struct xpc_partition *part;
 	struct xpc_channel *ch;
+	int wakeup_channel_mgr;
 
 
 	/* now wait for all callouts to the caller's function to cease */
 	for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 		part = &xpc_partitions[partid];
 
-		if (xpc_part_ref(part)) {
-			ch = &part->channels[ch_number];
+		if (!xpc_part_ref(part)) {
+			continue;
+		}
 
-			if (ch->flags & XPC_C_WDISCONNECT) {
-				if (!(ch->flags & XPC_C_DISCONNECTED)) {
-					(void) down(&ch->wdisconnect_sema);
-				}
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				ch->flags &= ~XPC_C_WDISCONNECT;
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-			}
+		ch = &part->channels[ch_number];
 
+		if (!(ch->flags & XPC_C_WDISCONNECT)) {
 			xpc_part_deref(part);
+			continue;
 		}
+
+		(void) down(&ch->wdisconnect_sema);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+		wakeup_channel_mgr = 0;
+
+		if (ch->delayed_IPI_flags) {
+			if (part->act_state != XPC_P_DEACTIVATING) {
+				spin_lock(&part->IPI_lock);
+				XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+					ch->number, ch->delayed_IPI_flags);
+				spin_unlock(&part->IPI_lock);
+				wakeup_channel_mgr = 1;
+			}
+			ch->delayed_IPI_flags = 0;
+		}
+
+		ch->flags &= ~XPC_C_WDISCONNECT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		if (wakeup_channel_mgr) {
+			xpc_wakeup_channel_mgr(part);
+		}
+
+		xpc_part_deref(part);
 	}
 }
 
@@ -873,11 +934,11 @@ xpc_do_exit(enum xpc_retval reason)
 	/* ignore all incoming interrupts */
 	free_irq(SGI_XPC_ACTIVATE, NULL);
 
-	/* wait for the discovery thread to mark itself inactive */
-	down(&xpc_discovery_inactive);
+	/* wait for the discovery thread to exit */
+	down(&xpc_discovery_exited);
 
-	/* wait for the heartbeat checker thread to mark itself inactive */
-	down(&xpc_hb_checker_inactive);
+	/* wait for the heartbeat checker thread to exit */
+	down(&xpc_hb_checker_exited);
 
 
 	/* sleep for a 1/3 of a second or so */
@@ -893,6 +954,7 @@ xpc_do_exit(enum xpc_retval reason)
 
 		for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 			part = &xpc_partitions[partid];
+
 			if (xpc_partition_disengaged(part) &&
 					part->act_state == XPC_P_INACTIVE) {
 				continue;
@@ -930,7 +992,7 @@ xpc_do_exit(enum xpc_retval reason)
 
 	/* now it's time to eliminate our heartbeat */
 	del_timer_sync(&xpc_hb_timer);
-	DBUG_ON(xpc_vars->heartbeating_to_mask == 0);
+	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
 
 	/* take ourselves off of the reboot_notifier_list */
 	(void) unregister_reboot_notifier(&xpc_reboot_notifier);
@@ -1134,7 +1196,7 @@ xpc_init(void)
 		dev_err(xpc_part, "failed while forking discovery thread\n");
 
 		/* mark this new thread as a non-starter */
-		up(&xpc_discovery_inactive);
+		up(&xpc_discovery_exited);
 
 		xpc_do_exit(xpcUnloading);
 		return -EBUSY;
@@ -1172,3 +1234,7 @@ module_param(xpc_hb_check_interval, int, 0);
 MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
 		"heartbeat checks.");
 
+module_param(xpc_disengage_request_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
+		"for disengage request to complete.");
+
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 79a0fc4c860..958488f5569 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -578,7 +578,7 @@ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
 
 
 /*
- * Prior code has determine the nasid which generated an IPI.  Inspect
+ * Prior code has determined the nasid which generated an IPI.  Inspect
  * that nasid to determine if its partition needs to be activated or
  * deactivated.
  *
@@ -942,14 +942,14 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 
 		/* set a timelimit on the disengage request */
 		part->disengage_request_timeout = jiffies +
-					(XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ);
+					(xpc_disengage_request_timelimit * HZ);
 		part->disengage_request_timer.expires =
 					part->disengage_request_timeout;
 		add_timer(&part->disengage_request_timer);
 	}
 
-	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
-		reason);
+	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+		XPC_PARTID(part), reason);
 
 	xpc_partition_going_down(part, reason);
 }
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
index f3052a54932..30312be3120 100644
--- a/include/asm-ia64/sn/xp.h
+++ b/include/asm-ia64/sn/xp.h
@@ -225,7 +225,9 @@ enum xpc_retval {
 
 	xpcDisconnecting,	/* 49: channel disconnecting (closing) */
 
-	xpcUnknownReason	/* 50: unknown reason -- must be last in list */
+	xpcOpenCloseError,	/* 50: channel open/close protocol error */
+
+	xpcUnknownReason	/* 51: unknown reason -- must be last in list */
 };
 
 
@@ -350,7 +352,7 @@ typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid,
  *
  * The 'func' field points to the function to call when aynchronous
  * notification is required for such events as: a connection established/lost,
- * or an incomming message received, or an error condition encountered. A
+ * or an incoming message received, or an error condition encountered. A
  * non-NULL 'func' field indicates that there is an active registration for
  * the channel.
  */
-- 
cgit v1.2.3-70-g09d2


From c1902aae322952f8726469a6657df7b9d5c794fe Mon Sep 17 00:00:00 2001
From: Dean Roe <roe@sgi.com>
Date: Thu, 27 Oct 2005 15:41:04 -0500
Subject: [IA64] - Avoid slow TLB purges on SGI Altix systems

flush_tlb_all() can be a scaling issue on large SGI Altix systems
since it uses the global call_lock and always executes on all cpus.
When a process enters flush_tlb_range() to purge TLBs for another
process, it is possible to avoid flush_tlb_all() and instead allow
sn2_global_tlb_purge() to purge TLBs only where necessary.

This patch modifies flush_tlb_range() so that this case can be handled
by platform TLB purge functions and updates ia64_global_tlb_purge()
accordingly.  sn2_global_tlb_purge() now calculates the region register
value from the mm argument introduced with this patch.

Signed-off-by: Dean Roe <roe@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/tlb.c                | 16 +++++++++-------
 arch/ia64/sn/kernel/sn2/sn2_smp.c | 31 +++++++++++++++++++------------
 include/asm-ia64/machvec.h        |  2 +-
 3 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 464557e4ed8..99ea8c70f40 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -86,10 +86,15 @@ wrap_mmu_context (struct mm_struct *mm)
 }
 
 void
-ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)
 {
 	static DEFINE_SPINLOCK(ptcg_lock);
 
+	if (mm != current->active_mm) {
+		flush_tlb_all();
+		return;
+	}
+
 	/* HW requires global serialization of ptc.ga.  */
 	spin_lock(&ptcg_lock);
 	{
@@ -135,15 +140,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 	unsigned long size = end - start;
 	unsigned long nbits;
 
+#ifndef CONFIG_SMP
 	if (mm != current->active_mm) {
-		/* this does happen, but perhaps it's not worth optimizing for? */
-#ifdef CONFIG_SMP
-		flush_tlb_all();
-#else
 		mm->context = 0;
-#endif
 		return;
 	}
+#endif
 
 	nbits = ia64_fls(size + 0xfff);
 	while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
@@ -153,7 +155,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 	start &= ~((1UL << nbits) - 1);
 
 # ifdef CONFIG_SMP
-	platform_global_tlb_purge(start, end, nbits);
+	platform_global_tlb_purge(mm, start, end, nbits);
 # else
 	do {
 		ia64_ptcl(start, (nbits<<2));
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 0a4ee50c302..49b530c39a4 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
 
 /**
  * sn2_global_tlb_purge - globally purge translation cache of virtual address range
+ * @mm: mm_struct containing virtual address range
  * @start: start of virtual address range
  * @end: end of virtual address range
  * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
@@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
  * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
  * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
  * 	  cpus in cpu_vm_mask.
- *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
- *	  then only the local TLB needs to be flushed. This flushing can be done
- *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu & the
+ *	  process is purging its own virtual address range, then only the
+ *	  local TLB needs to be flushed. This flushing can be done using
+ *	  ptc.l. This is the common case & avoids the global spinlock.
  *	- if multiple cpus have loaded the context, then flushing has to be
  *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
-sn2_global_tlb_purge(unsigned long start, unsigned long end,
-		     unsigned long nbits)
+sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
+		     unsigned long end, unsigned long nbits)
 {
 	int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
+	int mymm = (mm == current->active_mm);
 	volatile unsigned long *ptc0, *ptc1;
-	unsigned long itc, itc2, flags, data0 = 0, data1 = 0;
-	struct mm_struct *mm = current->active_mm;
+	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
 
@@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 		i++;
 	}
 
+	if (i == 0)
+		return;
+
 	preempt_disable();
 
-	if (likely(i == 1 && lcpu == smp_processor_id())) {
+	if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
 		do {
 			ia64_ptcl(start, nbits << 2);
 			start += (1UL << nbits);
@@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 		return;
 	}
 
-	if (atomic_read(&mm->mm_users) == 1) {
+	if (atomic_read(&mm->mm_users) == 1 && mymm) {
 		flush_tlb_mm(mm);
 		__get_cpu_var(ptcstats).change_rid++;
 		preempt_enable();
@@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 	for_each_node_mask(cnode, nodes_flushed)
 		nasids[nix++] = cnodeid_to_nasid(cnode);
 
+	rr_value = (mm->context << 3) | REGION_NUMBER(start);
+
 	shub1 = is_shub1();
 	if (shub1) {
 		data0 = (1UL << SH1_PTC_0_A_SHFT) |
 		    	(nbits << SH1_PTC_0_PS_SHFT) |
-		    	((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) |
+			(rr_value << SH1_PTC_0_RID_SHFT) |
 		    	(1UL << SH1_PTC_0_START_SHFT);
 		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
 		ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
@@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 			(nbits << SH2_PTC_PS_SHFT) |
 		    	(1UL << SH2_PTC_START_SHFT);
 		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
-			((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) );
+			(rr_value << SH2_PTC_RID_SHFT));
 		ptc1 = NULL;
 	}
 	
@@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
 		for (i = 0; i < nix; i++) {
 			nasid = nasids[i];
-			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) {
+			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
 				ia64_ptcga(start, nbits << 2);
 				ia64_srlz_i();
 			} else {
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 79e89a7db56..522c7f5ba8c 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -26,7 +26,7 @@ typedef void ia64_mv_cpu_init_t (void);
 typedef void ia64_mv_irq_init_t (void);
 typedef void ia64_mv_send_ipi_t (int, int, int, int);
 typedef void ia64_mv_timer_interrupt_t (int, void *, struct pt_regs *);
-typedef void ia64_mv_global_tlb_purge_t (unsigned long, unsigned long, unsigned long);
+typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
 typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
 typedef unsigned int ia64_mv_local_vector_to_irq (u8);
 typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
-- 
cgit v1.2.3-70-g09d2


From 06a544971fad0992fe8b92c5647538d573089dd4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Oct 2005 03:21:03 -0400
Subject: [PATCH] gfp_t: dma-mapping (ia64)

... and related annotations for amd64 - swiotlb code is shared, but
prototypes are not.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/hp/common/hwsw_iommu.c | 2 +-
 arch/ia64/hp/common/sba_iommu.c  | 2 +-
 arch/ia64/lib/swiotlb.c          | 2 +-
 arch/ia64/sn/pci/pci_dma.c       | 2 +-
 include/asm-ia64/machvec.h       | 2 +-
 include/asm-x86_64/swiotlb.h     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 80f8ef01393..1ba02baf2f9 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -71,7 +71,7 @@ hwsw_init (void)
 }
 
 void *
-hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	if (use_swiotlb(dev))
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 11957598a8b..21bffba78b6 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1076,7 +1076,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
  * See Documentation/DMA-mapping.txt
  */
 void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	struct ioc *ioc;
 	void *addr;
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
index a604efc7f6c..3ebbb3c8ba3 100644
--- a/arch/ia64/lib/swiotlb.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -314,7 +314,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, int flags)
+		       dma_addr_t *dma_handle, gfp_t flags)
 {
 	unsigned long dev_addr;
 	void *ret;
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 0e4b9ad9ef0..75e6e874beb 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_dma_set_mask);
  * more information.
  */
 void *sn_dma_alloc_coherent(struct device *dev, size_t size,
-			    dma_addr_t * dma_handle, int flags)
+			    dma_addr_t * dma_handle, gfp_t flags)
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 79e89a7db56..a2f6ac5aef7 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -37,7 +37,7 @@ typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
-typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, int);
+typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, gfp_t);
 typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t);
 typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int);
 typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int);
diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h
index 36293061f4e..7cbfd10ecc3 100644
--- a/include/asm-x86_64/swiotlb.h
+++ b/include/asm-x86_64/swiotlb.h
@@ -27,7 +27,7 @@ extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
 			 int nents, int direction);
 extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
 extern void *swiotlb_alloc_coherent (struct device *hwdev, size_t size,
-				     dma_addr_t *dma_handle, int flags);
+				     dma_addr_t *dma_handle, gfp_t flags);
 extern void swiotlb_free_coherent (struct device *hwdev, size_t size,
 				   void *vaddr, dma_addr_t dma_handle);
 
-- 
cgit v1.2.3-70-g09d2


From eb92f4ef320b738e41ad43476a5d05c8a20d5cc7 Mon Sep 17 00:00:00 2001
From: Rik Van Riel <riel@redhat.com>
Date: Sat, 29 Oct 2005 18:15:44 -0700
Subject: [PATCH] add sem_is_read/write_locked()

Add sem_is_read/write_locked functions to the read/write semaphores, along the
same lines of the *_is_locked spinlock functions.  The swap token tuning patch
uses sem_is_read_locked; sem_is_write_locked is added for completeness.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/rwsem.h      | 5 +++++
 include/asm-i386/rwsem.h       | 5 +++++
 include/asm-ia64/rwsem.h       | 5 +++++
 include/asm-ppc/rwsem.h        | 5 +++++
 include/asm-ppc64/rwsem.h      | 5 +++++
 include/asm-s390/rwsem.h       | 5 +++++
 include/asm-sh/rwsem.h         | 5 +++++
 include/asm-sparc64/rwsem.h    | 5 +++++
 include/asm-x86_64/rwsem.h     | 5 +++++
 include/linux/rwsem-spinlock.h | 5 +++++
 10 files changed, 50 insertions(+)

(limited to 'include/asm-ia64')

diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index 8e058a67c9a..fafdd4f7010 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -262,5 +262,10 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
 #endif
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _ALPHA_RWSEM_H */
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 7625a675852..be4ab859238 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -284,5 +284,10 @@ LOCK_PREFIX	"xadd %0,(%2)"
 	return tmp+delta;
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _I386_RWSEM_H */
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index e18b5ab0cb7..1327c91ea39 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -186,4 +186,9 @@ __downgrade_write (struct rw_semaphore *sem)
 #define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
 #define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* _ASM_IA64_RWSEM_H */
diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h
index 3e738f483c1..3501ea72f88 100644
--- a/include/asm-ppc/rwsem.h
+++ b/include/asm-ppc/rwsem.h
@@ -168,5 +168,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 	return atomic_add_return(delta, (atomic_t *)(&sem->count));
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-ppc64/rwsem.h b/include/asm-ppc64/rwsem.h
index bd5c2f09357..7a647fae376 100644
--- a/include/asm-ppc64/rwsem.h
+++ b/include/asm-ppc64/rwsem.h
@@ -163,5 +163,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 	return atomic_add_return(delta, (atomic_t *)(&sem->count));
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 8c0cebbfc03..0422a085dd5 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -351,5 +351,10 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 	return new;
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _S390_RWSEM_H */
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 1be4337f525..0262d3d1e5e 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -166,5 +166,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 	return atomic_add_return(delta, (atomic_t *)(&sem->count));
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_SH_RWSEM_H */
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
index 4568ee4022d..cef5e827042 100644
--- a/include/asm-sparc64/rwsem.h
+++ b/include/asm-sparc64/rwsem.h
@@ -56,6 +56,11 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
 	atomic_add(delta, (atomic_t *)(&sem->count));
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _SPARC64_RWSEM_H */
diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h
index c002175b6e8..46077e9c191 100644
--- a/include/asm-x86_64/rwsem.h
+++ b/include/asm-x86_64/rwsem.h
@@ -274,5 +274,10 @@ LOCK_PREFIX	"xaddl %0,(%2)"
 	return tmp+delta;
 }
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->count != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _X8664_RWSEM_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index b52a2af25f1..f30f805080a 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem));
 extern void FASTCALL(__up_write(struct rw_semaphore *sem));
 extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
 
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	return (sem->activity != 0);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_RWSEM_SPINLOCK_H */
-- 
cgit v1.2.3-70-g09d2


From 15a23ffa2fc91cebdac44d4aee994f59d5c28dc0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 29 Oct 2005 18:16:01 -0700
Subject: [PATCH] mm: tlb_gather_mmu get_cpu_var

tlb_gather_mmu dates from before kernel preemption was allowed, and uses
smp_processor_id or __get_cpu_var to find its per-cpu mmu_gather.  That works
because it's currently only called after getting page_table_lock, which is not
dropped until after the matching tlb_finish_mmu.  But don't rely on that, it
will soon change: now disable preemption internally by proper get_cpu_var in
tlb_gather_mmu, put_cpu_var in tlb_finish_mmu.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/tlb.h     |  5 +++--
 include/asm-arm26/tlb.h   |  7 ++++---
 include/asm-generic/tlb.h | 10 +++++-----
 include/asm-ia64/tlb.h    |  6 ++++--
 include/asm-sparc64/tlb.h |  4 +++-
 5 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index 9bb325c5464..da41df20928 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -39,8 +39,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	int cpu = smp_processor_id();
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu);
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 	tlb->freed = 0;
@@ -65,6 +64,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb)
diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h
index 1316352a58f..8486b00a679 100644
--- a/include/asm-arm26/tlb.h
+++ b/include/asm-arm26/tlb.h
@@ -17,13 +17,12 @@ struct mmu_gather {
         unsigned int            avoided_flushes;
 };
 
-extern struct mmu_gather mmu_gathers[NR_CPUS];
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-        int cpu = smp_processor_id();
-        struct mmu_gather *tlb = &mmu_gathers[cpu];
+        struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
         tlb->mm = mm;
         tlb->freed = 0;
@@ -52,6 +51,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
         /* keep the page table cache within bounds */
         check_pgt_cache();
+
+        put_cpu_var(mmu_gathers);
 }
 
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7d0298347ee..c8232622c8d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -35,9 +35,7 @@
 #endif
 
 /* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.  This structure
- * can be per-CPU or per-MM as the page table lock is held for the duration of
- * TLB shootdown.
+ * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
@@ -57,7 +55,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id());
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 
@@ -85,7 +83,7 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
 /* tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
- *	that were required.  The page table lock is still held at this point.
+ *	that were required.
  */
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
@@ -101,6 +99,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 3a9a6d1be75..1b82299d7c1 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -129,7 +129,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
 static inline struct mmu_gather *
 tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers);
+	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
 	/*
@@ -154,7 +154,7 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 
 /*
  * Called at the end of the shootdown operation to free up any resources that were
- * collected.  The page table lock is still held at this point.
+ * collected.
  */
 static inline void
 tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
@@ -174,6 +174,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 9baf57db01d..169309bdbf8 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -44,7 +44,7 @@ extern void flush_tlb_pending(void);
 
 static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+	struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
 
 	BUG_ON(mp->tlb_nr);
 
@@ -97,6 +97,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
+
+	put_cpu_var(mmu_gathers);
 }
 
 static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp)
-- 
cgit v1.2.3-70-g09d2


From 4d6ddfa9242bc3d27fb0f7248f6fdee0299c731f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 29 Oct 2005 18:16:02 -0700
Subject: [PATCH] mm: tlb_is_full_mm was obscure

tlb_is_full_mm?  What does that mean?  The TLB is full?  No, it means that the
mm's last user has gone and the whole mm is being torn down.  And it's an
inline function because sparc64 uses a different (slightly better)
"tlb_frozen" name for the flag others call "fullmm".

And now the ptep_get_and_clear_full macro used in zap_pte_range refers
directly to tlb->fullmm, which would be wrong for sparc64.  Rather than
correct that, I'd prefer to scrap tlb_is_full_mm altogether, and change
sparc64 to just use the same poor name as everyone else - is that okay?

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/mm/tlb.c     |  4 ++--
 include/asm-arm/tlb.h     |  5 -----
 include/asm-arm26/tlb.h   |  7 -------
 include/asm-generic/tlb.h |  6 ------
 include/asm-ia64/tlb.h    |  6 ------
 include/asm-sparc64/tlb.h | 13 ++++---------
 mm/memory.c               |  4 ++--
 7 files changed, 8 insertions(+), 37 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 90ca99d0b89..6a43f7cd090 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -72,7 +72,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
 
 no_cache_flush:
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 
 	nr = mp->tlb_nr;
@@ -97,7 +97,7 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long
 	unsigned long nr = mp->tlb_nr;
 	long s = start, e = end, vpte_base;
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 
 	/* If start is greater than end, that is a real problem.  */
diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index da41df20928..a35ab0f2e25 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -68,11 +68,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	put_cpu_var(mmu_gathers);
 }
 
-static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb)
-{
-	return tlb->fullmm;
-}
-
 #define tlb_remove_tlb_entry(tlb,ptep,address)	do { } while (0)
 
 /*
diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h
index 8486b00a679..c7d54ca0a23 100644
--- a/include/asm-arm26/tlb.h
+++ b/include/asm-arm26/tlb.h
@@ -55,13 +55,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
         put_cpu_var(mmu_gathers);
 }
 
-
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
-     return tlb->fullmm;
-}
-
 #define tlb_remove_tlb_entry(tlb,ptep,address)  do { } while (0)
 //#define tlb_start_vma(tlb,vma)                  do { } while (0)
 //FIXME - ARM32 uses this now that things changed in the kernel. seems like it may be pointless on arm26, however to get things compiling...
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index c8232622c8d..5d352a70f00 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -103,12 +103,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	put_cpu_var(mmu_gathers);
 }
 
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
-	return tlb->fullmm;
-}
-
 /* tlb_remove_page
  *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
  *	handling the additional races in SMP caused by other CPUs caching valid
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 1b82299d7c1..0bbd79f6a79 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -178,12 +178,6 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	put_cpu_var(mmu_gathers);
 }
 
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
-     return tlb->fullmm;
-}
-
 /*
  * Logically, this routine frees PAGE.  On MP machines, the actual freeing of the page
  * must be delayed until after the TLB has been flushed (see comments at the beginning of
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 169309bdbf8..5d194eae870 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -25,7 +25,7 @@ struct mmu_gather {
 	struct mm_struct *mm;
 	unsigned int pages_nr;
 	unsigned int need_flush;
-	unsigned int tlb_frozen;
+	unsigned int fullmm;
 	unsigned int tlb_nr;
 	unsigned long freed;
 	unsigned long vaddrs[TLB_BATCH_NR];
@@ -50,7 +50,7 @@ static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned i
 
 	mp->mm = mm;
 	mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
-	mp->tlb_frozen = full_mm_flush;
+	mp->fullmm = full_mm_flush;
 	mp->freed = 0;
 
 	return mp;
@@ -88,10 +88,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
 
 	tlb_flush_mmu(mp);
 
-	if (mp->tlb_frozen) {
+	if (mp->fullmm) {
 		if (CTX_VALID(mm->context))
 			do_flush_tlb_mm(mm);
-		mp->tlb_frozen = 0;
+		mp->fullmm = 0;
 	} else
 		flush_tlb_pending();
 
@@ -101,11 +101,6 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
 	put_cpu_var(mmu_gathers);
 }
 
-static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp)
-{
-	return mp->tlb_frozen;
-}
-
 static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
 {
 	mp->need_flush = 1;
diff --git a/mm/memory.c b/mm/memory.c
index eaf79031f57..585bb4e0b97 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -249,7 +249,7 @@ void free_pgd_range(struct mmu_gather **tlb,
 		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 
-	if (!tlb_is_full_mm(*tlb))
+	if (!(*tlb)->fullmm)
 		flush_tlb_pgtables((*tlb)->mm, start, end);
 }
 
@@ -698,7 +698,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
 	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
-	int fullmm = tlb_is_full_mm(*tlbp);
+	int fullmm = (*tlbp)->fullmm;
 
 	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
 		unsigned long end;
-- 
cgit v1.2.3-70-g09d2


From fc2acab31be8e869b2d5f6de12f557f6f054f19c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 29 Oct 2005 18:16:03 -0700
Subject: [PATCH] mm: tlb_finish_mmu forget rss

zap_pte_range has been counting the pages it frees in tlb->freed, then
tlb_finish_mmu has used that to update the mm's rss.  That got stranger when I
added anon_rss, yet updated it by a different route; and stranger when rss and
anon_rss became mm_counters with special access macros.  And it would no
longer be viable if we're relying on page_table_lock to stabilize the
mm_counter, but calling tlb_finish_mmu outside that lock.

Remove the mmu_gather's freed field, let tlb_finish_mmu stick to its own
business, just decrement the rss mm_counter in zap_pte_range (yes, there was
some point to batching the update, and a subsequent patch restores that).  And
forget the anal paranoia of first reading the counter to avoid going negative
- if rss does go negative, just fix that bug.

Remove the mmu_gather's flushes and avoided_flushes from arm and arm26: no use
was being made of them.  But arm26 alone was actually using the freed, in the
way some others use need_flush: give it a need_flush.  arm26 seems to prefer
spaces to tabs here: respect that.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/mm/tlb.c     |  3 +--
 include/asm-arm/tlb.h     | 15 +--------------
 include/asm-arm26/tlb.h   | 35 +++++++++++++----------------------
 include/asm-generic/tlb.h |  9 ---------
 include/asm-ia64/tlb.h    |  9 ---------
 include/asm-sparc64/tlb.h | 14 ++------------
 mm/memory.c               |  2 +-
 7 files changed, 18 insertions(+), 69 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 6a43f7cd090..8b104be4662 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -18,8 +18,7 @@
 
 /* Heavily inspired by the ppc64 code.  */
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
-	{ NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = { 0, };
 
 void flush_tlb_pending(void)
 {
diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index a35ab0f2e25..f49bfb78c22 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -27,11 +27,7 @@
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
-	unsigned int		freed;
 	unsigned int		fullmm;
-
-	unsigned int		flushes;
-	unsigned int		avoided_flushes;
 };
 
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -42,7 +38,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
 	tlb->mm = mm;
-	tlb->freed = 0;
 	tlb->fullmm = full_mm_flush;
 
 	return tlb;
@@ -51,16 +46,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	struct mm_struct *mm = tlb->mm;
-	unsigned long freed = tlb->freed;
-	int rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
-
 	if (tlb->fullmm)
-		flush_tlb_mm(mm);
+		flush_tlb_mm(tlb->mm);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h
index c7d54ca0a23..08ddd85b8d3 100644
--- a/include/asm-arm26/tlb.h
+++ b/include/asm-arm26/tlb.h
@@ -10,11 +10,8 @@
  */
 struct mmu_gather {
         struct mm_struct        *mm;
-        unsigned int            freed;
-	unsigned int            fullmm;
-
-        unsigned int            flushes;
-        unsigned int            avoided_flushes;
+        unsigned int            need_flush;
+        unsigned int            fullmm;
 };
 
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -25,8 +22,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
         struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
         tlb->mm = mm;
-        tlb->freed = 0;
-	tlb->fullmm = full_mm_flush;
+        tlb->need_flush = 0;
+        tlb->fullmm = full_mm_flush;
 
         return tlb;
 }
@@ -34,20 +31,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-        struct mm_struct *mm = tlb->mm;
-        unsigned long freed = tlb->freed;
-        int rss = get_mm_counter(mm, rss);
-
-        if (rss < freed)
-                freed = rss;
-        add_mm_counter(mm, rss, -freed);
-
-        if (freed) {
-                flush_tlb_mm(mm);
-                tlb->flushes++;
-        } else {
-                tlb->avoided_flushes++;
-        }
+        if (tlb->need_flush)
+                flush_tlb_mm(tlb->mm);
 
         /* keep the page table cache within bounds */
         check_pgt_cache();
@@ -65,7 +50,13 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
         } while (0)
 #define tlb_end_vma(tlb,vma)                    do { } while (0)
 
-#define tlb_remove_page(tlb,page)       free_page_and_swap_cache(page)
+static inline void
+tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+        tlb->need_flush = 1;
+        free_page_and_swap_cache(page);
+}
+
 #define pte_free_tlb(tlb,ptep)          pte_free(ptep)
 #define pmd_free_tlb(tlb,pmdp)          pmd_free(pmdp)
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 5d352a70f00..cdd4145243c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -42,7 +42,6 @@ struct mmu_gather {
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
-	unsigned long		freed;
 	struct page *		pages[FREE_PTE_NR];
 };
 
@@ -63,7 +62,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
 
 	tlb->fullmm = full_mm_flush;
-	tlb->freed = 0;
 
 	return tlb;
 }
@@ -88,13 +86,6 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	int freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	int rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
 	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 0bbd79f6a79..834370b9dea 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -60,7 +60,6 @@ struct mmu_gather {
 	unsigned int		nr;		/* == ~0U => fast mode */
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
-	unsigned long		freed;		/* number of pages freed */
 	unsigned long		start_addr;
 	unsigned long		end_addr;
 	struct page 		*pages[FREE_PTE_NR];
@@ -147,7 +146,6 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 	 */
 	tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
 	tlb->fullmm = full_mm_flush;
-	tlb->freed = 0;
 	tlb->start_addr = ~0UL;
 	return tlb;
 }
@@ -159,13 +157,6 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
 static inline void
 tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-	unsigned long freed = tlb->freed;
-	struct mm_struct *mm = tlb->mm;
-	unsigned long rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
 	 * tlb->end_addr.
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 5d194eae870..66138d959df 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -27,7 +27,6 @@ struct mmu_gather {
 	unsigned int need_flush;
 	unsigned int fullmm;
 	unsigned int tlb_nr;
-	unsigned long freed;
 	unsigned long vaddrs[TLB_BATCH_NR];
 	struct page *pages[FREE_PTE_NR];
 };
@@ -51,7 +50,6 @@ static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned i
 	mp->mm = mm;
 	mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
 	mp->fullmm = full_mm_flush;
-	mp->freed = 0;
 
 	return mp;
 }
@@ -78,19 +76,11 @@ extern void smp_flush_tlb_mm(struct mm_struct *mm);
 
 static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
 {
-	unsigned long freed = mp->freed;
-	struct mm_struct *mm = mp->mm;
-	unsigned long rss = get_mm_counter(mm, rss);
-
-	if (rss < freed)
-		freed = rss;
-	add_mm_counter(mm, rss, -freed);
-
 	tlb_flush_mmu(mp);
 
 	if (mp->fullmm) {
-		if (CTX_VALID(mm->context))
-			do_flush_tlb_mm(mm);
+		if (CTX_VALID(mp->mm->context))
+			do_flush_tlb_mm(mp->mm);
 		mp->fullmm = 0;
 	} else
 		flush_tlb_pending();
diff --git a/mm/memory.c b/mm/memory.c
index 585bb4e0b97..51eb3857483 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -582,7 +582,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 				if (pte_young(ptent))
 					mark_page_accessed(page);
 			}
-			tlb->freed++;
+			dec_mm_counter(tlb->mm, rss);
 			page_remove_rmap(page);
 			tlb_remove_page(tlb, page);
 			continue;
-- 
cgit v1.2.3-70-g09d2


From dfb7dac3af623a68262536437af008ed6aba4d88 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 30 Oct 2005 15:02:22 -0800
Subject: [PATCH] unify sys_ptrace prototype

Make sure we always return, as all syscalls should.  Also move the common
prototype to <linux/syscalls.h>

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/kernel/ptrace.c               | 2 +-
 arch/arm26/kernel/ptrace.c             | 2 +-
 arch/frv/kernel/ptrace.c               | 2 +-
 arch/h8300/kernel/ptrace.c             | 2 +-
 arch/i386/kernel/ptrace.c              | 2 +-
 arch/ia64/ia32/sys_ia32.c              | 1 +
 arch/m32r/kernel/ptrace.c              | 2 +-
 arch/m68k/kernel/ptrace.c              | 2 +-
 arch/m68knommu/kernel/ptrace.c         | 2 +-
 arch/mips/kernel/ptrace.c              | 2 +-
 arch/ppc/kernel/ptrace.c               | 2 +-
 arch/ppc64/kernel/ptrace.c             | 2 +-
 arch/sh/kernel/ptrace.c                | 2 +-
 arch/sh64/kernel/ptrace.c              | 2 +-
 arch/um/include/sysdep-i386/syscalls.h | 1 -
 arch/v850/kernel/ptrace.c              | 2 +-
 arch/xtensa/kernel/ptrace.c            | 2 +-
 include/asm-arm/unistd.h               | 1 -
 include/asm-arm26/unistd.h             | 1 -
 include/asm-cris/unistd.h              | 1 -
 include/asm-h8300/unistd.h             | 1 -
 include/asm-i386/unistd.h              | 1 -
 include/asm-ia64/unistd.h              | 2 --
 include/asm-m32r/unistd.h              | 1 -
 include/asm-m68k/unistd.h              | 1 -
 include/asm-m68knommu/unistd.h         | 1 -
 include/asm-mips/unistd.h              | 1 -
 include/asm-ppc/unistd.h               | 1 -
 include/asm-ppc64/unistd.h             | 1 -
 include/asm-s390/unistd.h              | 1 -
 include/asm-sh/unistd.h                | 1 -
 include/asm-v850/unistd.h              | 1 -
 include/asm-x86_64/unistd.h            | 2 --
 include/linux/syscalls.h               | 1 +
 34 files changed, 17 insertions(+), 34 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index cd99b83f14c..9bd8609a292 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -782,7 +782,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 	return ret;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c
index 8a52124de0e..cf7e977d18c 100644
--- a/arch/arm26/kernel/ptrace.c
+++ b/arch/arm26/kernel/ptrace.c
@@ -665,7 +665,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 	return ret;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index cbe03cba9f0..cb335a14a31 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -106,7 +106,7 @@ void ptrace_enable(struct task_struct *child)
 	child->thread.frame0->__status |= REG__STATUS_STEP;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	unsigned long tmp;
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
index 05c15e86977..a569fe4aa28 100644
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -57,7 +57,7 @@ void ptrace_disable(struct task_struct *child)
 	h8300_disable_trace(child);
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 7b6368bf897..efd11f09c99 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -354,7 +354,7 @@ ptrace_set_thread_area(struct task_struct *child,
 	return 0;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	struct user * dummy = NULL;
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 3fa67ecebc8..dc282710421 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -36,6 +36,7 @@
 #include <linux/uio.h>
 #include <linux/nfs_fs.h>
 #include <linux/quota.h>
+#include <linux/syscalls.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 124f7c1b775..078d2a0e71c 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -756,7 +756,7 @@ do_ptrace(long request, struct task_struct *child, long addr, long data)
 	return ret;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 8ed1b01a6a8..f7f1d2e5b90 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -121,7 +121,7 @@ void ptrace_disable(struct task_struct *child)
 	child->thread.work.syscall_trace = 0;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	unsigned long tmp;
diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c
index 9724e1cd82e..621d7b91ccf 100644
--- a/arch/m68knommu/kernel/ptrace.c
+++ b/arch/m68knommu/kernel/ptrace.c
@@ -101,7 +101,7 @@ void ptrace_disable(struct task_struct *child)
 	put_reg(child, PT_SR, tmp);
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index fcceab8f2e0..f1b0f3e1f95 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -174,7 +174,7 @@ int ptrace_setfpregs (struct task_struct *child, __u32 __user *data)
 	return 0;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret;
diff --git a/arch/ppc/kernel/ptrace.c b/arch/ppc/kernel/ptrace.c
index e7aee4108de..e2744b6879d 100644
--- a/arch/ppc/kernel/ptrace.c
+++ b/arch/ppc/kernel/ptrace.c
@@ -240,7 +240,7 @@ void ptrace_disable(struct task_struct *child)
 	clear_single_step(child);
 }
 
-int sys_ptrace(long request, long pid, long addr, long data)
+long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret = -EPERM;
diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c
index b1c044ca575..b33073c3172 100644
--- a/arch/ppc64/kernel/ptrace.c
+++ b/arch/ppc64/kernel/ptrace.c
@@ -53,7 +53,7 @@ void ptrace_disable(struct task_struct *child)
 	clear_single_step(child);
 }
 
-int sys_ptrace(long request, long pid, long addr, long data)
+long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret = -EPERM;
diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c
index b28919b6568..1fbe5a428e3 100644
--- a/arch/sh/kernel/ptrace.c
+++ b/arch/sh/kernel/ptrace.c
@@ -80,7 +80,7 @@ void ptrace_disable(struct task_struct *child)
 	/* nothing to do.. */
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	struct user * dummy = NULL;
diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c
index fd2000956da..71f2eec00b9 100644
--- a/arch/sh64/kernel/ptrace.c
+++ b/arch/sh64/kernel/ptrace.c
@@ -121,7 +121,7 @@ put_fpu_long(struct task_struct *task, unsigned long addr, unsigned long data)
 	return 0;
 }
 
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	extern void poke_real_address_q(unsigned long long addr, unsigned long long data);
diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h
index a0d5b74d373..57bd79efbee 100644
--- a/arch/um/include/sysdep-i386/syscalls.h
+++ b/arch/um/include/sysdep-i386/syscalls.h
@@ -11,7 +11,6 @@ typedef long syscall_handler_t(struct pt_regs);
 /* Not declared on x86, incompatible declarations on x86_64, so these have
  * to go here rather than in sys_call_table.c
  */
-extern syscall_handler_t sys_ptrace;
 extern syscall_handler_t sys_rt_sigaction;
 
 extern syscall_handler_t old_mmap_i386;
diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c
index 4726b87f5e5..d6077ff47d2 100644
--- a/arch/v850/kernel/ptrace.c
+++ b/arch/v850/kernel/ptrace.c
@@ -113,7 +113,7 @@ static int set_single_step (struct task_struct *t, int val)
 	return 1;
 }
 
-int sys_ptrace(long request, long pid, long addr, long data)
+long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int rval;
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 2659efdd4e9..14460743de0 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -45,7 +45,7 @@ void ptrace_disable(struct task_struct *child)
 	/* Nothing to do.. */
 }
 
-int sys_ptrace(long request, long pid, long addr, long data)
+long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret = -EPERM;
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index c49df635a80..d626e70fade 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -544,7 +544,6 @@ asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
 asmlinkage int sys_fork(struct pt_regs *regs);
 asmlinkage int sys_vfork(struct pt_regs *regs);
 asmlinkage int sys_pipe(unsigned long *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h
index dfa0b0c30aa..be4c2fb9c04 100644
--- a/include/asm-arm26/unistd.h
+++ b/include/asm-arm26/unistd.h
@@ -480,7 +480,6 @@ asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
 asmlinkage int sys_fork(struct pt_regs *regs);
 asmlinkage int sys_vfork(struct pt_regs *regs);
 asmlinkage int sys_pipe(unsigned long *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h
index 28232ad2ff3..156a34bfc58 100644
--- a/include/asm-cris/unistd.h
+++ b/include/asm-cris/unistd.h
@@ -367,7 +367,6 @@ asmlinkage int sys_fork(long r10, long r11, long r12, long r13,
 asmlinkage int sys_vfork(long r10, long r11, long r12, long r13,
 			long mof, long srp, struct pt_regs *regs);
 asmlinkage int sys_pipe(unsigned long __user *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h
index 56a6401886f..56a4a5686c8 100644
--- a/include/asm-h8300/unistd.h
+++ b/include/asm-h8300/unistd.h
@@ -528,7 +528,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 asmlinkage int sys_execve(char *name, char **argv, char **envp,
 			int dummy, ...);
 asmlinkage int sys_pipe(unsigned long *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fbaf90a3968..0f92e78dfea 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -448,7 +448,6 @@ asmlinkage int sys_clone(struct pt_regs regs);
 asmlinkage int sys_fork(struct pt_regs regs);
 asmlinkage int sys_vfork(struct pt_regs regs);
 asmlinkage int sys_pipe(unsigned long __user *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 asmlinkage long sys_iopl(unsigned long unused);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 3a0c6952465..6d96a67439b 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -383,8 +383,6 @@ struct sigaction;
 long sys_execve(char __user *filename, char __user * __user *argv,
 			   char __user * __user *envp, struct pt_regs *regs);
 asmlinkage long sys_pipe(void);
-asmlinkage long sys_ptrace(long request, pid_t pid,
-			   unsigned long addr, unsigned long data);
 asmlinkage long sys_rt_sigaction(int sig,
 				 const struct sigaction __user *act,
 				 struct sigaction __user *oact,
diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h
index 8552d8f45ab..ac399e1f7bc 100644
--- a/include/asm-m32r/unistd.h
+++ b/include/asm-m32r/unistd.h
@@ -452,7 +452,6 @@ asmlinkage int sys_clone(struct pt_regs regs);
 asmlinkage int sys_fork(struct pt_regs regs);
 asmlinkage int sys_vfork(struct pt_regs regs);
 asmlinkage int sys_pipe(unsigned long __user *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				 const struct sigaction __user *act,
diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h
index cbabde4f8a4..c2554bcd174 100644
--- a/include/asm-m68k/unistd.h
+++ b/include/asm-m68k/unistd.h
@@ -444,7 +444,6 @@ asmlinkage long sys_mmap2(
 			unsigned long fd, unsigned long pgoff);
 asmlinkage int sys_execve(char *name, char **argv, char **envp);
 asmlinkage int sys_pipe(unsigned long *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct pt_regs;
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h
index 84b6fa14459..5373988a7e5 100644
--- a/include/asm-m68knommu/unistd.h
+++ b/include/asm-m68knommu/unistd.h
@@ -504,7 +504,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			unsigned long fd, unsigned long pgoff);
 asmlinkage int sys_execve(char *name, char **argv, char **envp);
 asmlinkage int sys_pipe(unsigned long *fildes);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct pt_regs;
 int sys_request_irq(unsigned int,
 			irqreturn_t (*)(int, void *, struct pt_regs *),
diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index c9eaf4c104d..89ea8b60e94 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -1177,7 +1177,6 @@ asmlinkage long sys_mmap2(
 			unsigned long fd, unsigned long pgoff);
 asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs);
 asmlinkage int sys_pipe(nabi_no_regargs struct pt_regs regs);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h
index 3173ab3d2eb..404c143e643 100644
--- a/include/asm-ppc/unistd.h
+++ b/include/asm-ppc/unistd.h
@@ -469,7 +469,6 @@ int sys_fork(int p1, int p2, int p3, int p4, int p5, int p6,
 int sys_vfork(int p1, int p2, int p3, int p4, int p5, int p6,
 		struct pt_regs *regs);
 int sys_pipe(int __user *fildes);
-int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 long sys_rt_sigaction(int sig,
 		      const struct sigaction __user *act,
diff --git a/include/asm-ppc64/unistd.h b/include/asm-ppc64/unistd.h
index 977bc980c1a..6b5fcbadbc5 100644
--- a/include/asm-ppc64/unistd.h
+++ b/include/asm-ppc64/unistd.h
@@ -467,7 +467,6 @@ int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
 		unsigned long p4, unsigned long p5, unsigned long p6,
 		struct pt_regs *regs);
 int sys_pipe(int __user *fildes);
-int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 long sys_rt_sigaction(int sig, const struct sigaction __user *act,
 		      struct sigaction __user *oact, size_t sigsetsize);
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index 221e965da92..f97d92691f1 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -590,7 +590,6 @@ asmlinkage long sys_clone(struct pt_regs regs);
 asmlinkage long sys_fork(struct pt_regs regs);
 asmlinkage long sys_vfork(struct pt_regs regs);
 asmlinkage long sys_pipe(unsigned long __user *fildes);
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h
index ea89e8f223e..f2c8e14d1fd 100644
--- a/include/asm-sh/unistd.h
+++ b/include/asm-sh/unistd.h
@@ -503,7 +503,6 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
 asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
 			unsigned long r6, unsigned long r7,
 			struct pt_regs regs);
-asmlinkage int sys_ptrace(long request, long pid, long addr, long data);
 asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char *buf,
 				size_t count, long dummy, loff_t pos);
 asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char *buf,
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
index 3b552096c0e..5a86f8e976e 100644
--- a/include/asm-v850/unistd.h
+++ b/include/asm-v850/unistd.h
@@ -452,7 +452,6 @@ unsigned long sys_mmap2(unsigned long addr, size_t len,
 struct pt_regs;
 int sys_execve (char *name, char **argv, char **envp, struct pt_regs *regs);
 int sys_pipe (int *fildes);
-int sys_ptrace(long request, long pid, long addr, long data);
 struct sigaction;
 asmlinkage long sys_rt_sigaction(int sig,
 				const struct sigaction __user *act,
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 11ba931cf82..3c494b65d33 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -780,8 +780,6 @@ asmlinkage long sys_pipe(int *fildes);
 #include <linux/types.h>
 #include <asm/ptrace.h>
 
-asmlinkage long sys_ptrace(long request, long pid,
-				unsigned long addr, long data);
 asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs);
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on);
 struct sigaction;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a6f03e47373..c7007b1db91 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -491,6 +491,7 @@ asmlinkage long sys_nfsservctl(int cmd,
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
 asmlinkage long sys_uselib(const char __user *library);
 asmlinkage long sys_ni_syscall(void);
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data);
 
 asmlinkage long sys_add_key(const char __user *_type,
 			    const char __user *_description,
-- 
cgit v1.2.3-70-g09d2


From 1426d7a81dea8e9d85f9d69de85ab04ba37018ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 30 Oct 2005 15:02:42 -0800
Subject: [PATCH] vm: remove unused/broken page_pte[_prot] macros

This patch removes page_pte_prot and page_pte macros from all
architectures.  Some architectures define both, some only page_pte (broken)
and others none.  These macros are not used anywhere.

page_pte_prot(page, prot) is identical to mk_pte(page, prot) and
page_pte(page) is identical to page_pte_prot(page, __pgprot(0)).

* The following architectures define both page_pte_prot and page_pte

  arm, arm26, ia64, sh64, sparc, sparc64

* The following architectures define only page_pte (broken)

  frv, i386, m32r, mips, sh, x86-64

* All other architectures define neither

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/pgtable.h     | 3 ---
 include/asm-arm26/pgtable.h   | 2 --
 include/asm-frv/pgtable.h     | 2 --
 include/asm-i386/pgtable.h    | 2 --
 include/asm-ia64/pgtable.h    | 3 ---
 include/asm-m32r/pgtable.h    | 2 --
 include/asm-mips/pgtable-64.h | 1 -
 include/asm-mips/pgtable.h    | 1 -
 include/asm-sh/pgtable.h      | 2 --
 include/asm-sh64/pgtable.h    | 3 ---
 include/asm-sparc/pgtable.h   | 2 --
 include/asm-sparc64/pgtable.h | 3 ---
 include/asm-x86_64/pgtable.h  | 2 --
 13 files changed, 28 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h
index 366bafbdfbb..5a0d19b466b 100644
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -397,9 +397,6 @@ static inline pte_t *pmd_page_kernel(pmd_t pmd)
 #define pgd_clear(pgdp)		do { } while (0)
 #define set_pgd(pgd,pgdp)	do { } while (0)
 
-#define page_pte_prot(page,prot)	mk_pte(page, prot)
-#define page_pte(page)		mk_pte(page, __pgprot(0))
-
 /* to find an entry in a page-table-directory */
 #define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
 
diff --git a/include/asm-arm26/pgtable.h b/include/asm-arm26/pgtable.h
index f602cf57241..a590250277f 100644
--- a/include/asm-arm26/pgtable.h
+++ b/include/asm-arm26/pgtable.h
@@ -98,8 +98,6 @@ extern struct page *empty_zero_page;
 #define pfn_pte(pfn,prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pages_to_mb(x)		((x) >> (20 - PAGE_SHIFT))
 #define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
-#define page_pte_prot(page,prot)	mk_pte(page, prot)
-#define page_pte(page)		mk_pte(page, __pgprot(0))
 
 /*
  * Terminology: PGD = Page Directory, PMD = Page Middle Directory,
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 473fb4bb632..b247e99dff4 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -436,8 +436,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte;
 }
 
-#define page_pte(page)	page_pte_prot((page), __pgprot(0))
-
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 #define pgd_index_k(addr) pgd_index(addr)
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 38ec33eb6f1..03f3c8ac638 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -323,8 +323,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte;
 }
 
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 #define pmd_large(pmd) \
 ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
 
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 3339c7b55a6..21e32a06bc8 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -236,9 +236,6 @@ ia64_phys_addr_valid (unsigned long addr)
 #define pte_modify(_pte, newprot) \
 	(__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK)))
 
-#define page_pte_prot(page,prot)	mk_pte(page, prot)
-#define page_pte(page)			page_pte_prot(page, __pgprot(0))
-
 #define pte_none(pte) 			(!pte_val(pte))
 #define pte_present(pte)		(pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
 #define pte_clear(mm,addr,pte)		(pte_val(*(pte)) = 0UL)
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 388e5ee9fa2..1cd5fd4a5b2 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -324,8 +324,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	return pte;
 }
 
-#define page_pte(page)	page_pte_prot(page, __pgprot(0))
-
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h
index 3e0a522c0f0..82166b254b2 100644
--- a/include/asm-mips/pgtable-64.h
+++ b/include/asm-mips/pgtable-64.h
@@ -169,7 +169,6 @@ static inline void pud_clear(pud_t *pudp)
 #define __pgd_offset(address)	pgd_index(address)
 #define __pud_offset(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define __pmd_offset(address)	pmd_index(address)
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, 0)
diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h
index 1e8ae2723be..34facd99650 100644
--- a/include/asm-mips/pgtable.h
+++ b/include/asm-mips/pgtable.h
@@ -75,7 +75,6 @@ extern void paging_init(void);
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
-#define page_pte(page)		page_pte_prot(page, __pgprot(0))
 #define pmd_phys(pmd)		(pmd_val(pmd) - PAGE_OFFSET)
 #define pmd_page(pmd)		(pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
 #define pmd_page_kernel(pmd)	pmd_val(pmd)
diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h
index 0f4bcaae61b..aef8ae43de1 100644
--- a/include/asm-sh/pgtable.h
+++ b/include/asm-sh/pgtable.h
@@ -224,8 +224,6 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 { set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
 
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 #define pmd_page_kernel(pmd) \
 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h
index 51db4307bfa..51b05818e4e 100644
--- a/include/asm-sh64/pgtable.h
+++ b/include/asm-sh64/pgtable.h
@@ -457,9 +457,6 @@ extern inline pte_t pte_mkhuge(pte_t pte)	{ set_pte(&pte, __pte(pte_val(pte) | _
 extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 { set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
 
-#define page_pte_prot(page, prot) mk_pte(page, prot)
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 typedef pte_t *pte_addr_t;
 #define pgtable_cache_init()	do { } while (0)
 
diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h
index a14e9867750..b33c35411e8 100644
--- a/include/asm-sparc/pgtable.h
+++ b/include/asm-sparc/pgtable.h
@@ -255,8 +255,6 @@ BTFIXUPDEF_CALL_CONST(pte_t, pte_mkyoung, pte_t)
 #define pte_mkdirty(pte) BTFIXUP_CALL(pte_mkdirty)(pte)
 #define pte_mkyoung(pte) BTFIXUP_CALL(pte_mkyoung)(pte)
 
-#define page_pte_prot(page, prot)	mk_pte(page, prot)
-#define page_pte(page)			mk_pte(page, __pgprot(0))
 #define pfn_pte(pfn, prot)		mk_pte(pfn_to_page(pfn), prot)
 
 BTFIXUPDEF_CALL(unsigned long,	 pte_pfn, pte_t)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 8c6dfc6c7af..9a02879b235 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -231,9 +231,6 @@ extern struct page *mem_map_zero;
 #define pte_pfn(x)		((pte_val(x) & _PAGE_PADDR)>>PAGE_SHIFT)
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 
-#define page_pte_prot(page, prot)	mk_pte(page, prot)
-#define page_pte(page)			page_pte_prot(page, __pgprot(0))
-
 static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
 {
 	pte_t __pte;
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index dd8711ecaf2..7a07196a720 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -318,8 +318,6 @@ static inline int pmd_large(pmd_t pte) {
  * and a page entry and page directory to the page they refer to.
  */
 
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
 /*
  * Level 4 access.
  */
-- 
cgit v1.2.3-70-g09d2


From 727a53bd535fe3bde644ac346db27456ad964083 Mon Sep 17 00:00:00 2001
From: Arthur Othieno <a.othieno@bluewin.ch>
Date: Sun, 30 Oct 2005 15:03:14 -0800
Subject: [PATCH] semaphore: Remove __MUTEX_INITIALIZER()

__MUTEX_INITIALIZER() has no users, and equates to the more commonly used
DECLARE_MUTEX(), thus making it pretty much redundant.  Remove it for good.

Signed-off-by: Arthur Othieno <a.othieno@bluewin.ch>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/semaphore.h     | 3 ---
 include/asm-arm/semaphore.h       | 2 --
 include/asm-arm26/semaphore.h     | 3 ---
 include/asm-cris/semaphore.h      | 3 ---
 include/asm-frv/semaphore.h       | 3 ---
 include/asm-h8300/semaphore.h     | 3 ---
 include/asm-i386/semaphore.h      | 3 ---
 include/asm-ia64/semaphore.h      | 2 --
 include/asm-m32r/semaphore.h      | 3 ---
 include/asm-m68k/semaphore.h      | 3 ---
 include/asm-m68knommu/semaphore.h | 3 ---
 include/asm-mips/semaphore.h      | 3 ---
 include/asm-parisc/semaphore.h    | 3 ---
 include/asm-ppc/semaphore.h       | 3 ---
 include/asm-ppc64/semaphore.h     | 3 ---
 include/asm-s390/semaphore.h      | 3 ---
 include/asm-sh/semaphore.h        | 3 ---
 include/asm-sh64/semaphore.h      | 3 ---
 include/asm-sparc/semaphore.h     | 3 ---
 include/asm-sparc64/semaphore.h   | 3 ---
 include/asm-v850/semaphore.h      | 3 ---
 include/asm-x86_64/semaphore.h    | 3 ---
 include/asm-xtensa/semaphore.h    | 3 ---
 23 files changed, 67 deletions(-)

(limited to 'include/asm-ia64')

diff --git a/include/asm-alpha/semaphore.h b/include/asm-alpha/semaphore.h
index eb2cbd97d40..1a6295f2c2d 100644
--- a/include/asm-alpha/semaphore.h
+++ b/include/asm-alpha/semaphore.h
@@ -26,9 +26,6 @@ struct semaphore {
   	.wait	= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait),	\
 }
 
-#define __MUTEX_INITIALIZER(name)			\
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count)		\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-arm/semaphore.h b/include/asm-arm/semaphore.h
index 60f33e6eb80..71ca7d41268 100644
--- a/include/asm-arm/semaphore.h
+++ b/include/asm-arm/semaphore.h
@@ -24,8 +24,6 @@ struct semaphore {
 	.wait	= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait),	\
 }
 
-#define __MUTEX_INITIALIZER(name) __SEMAPHORE_INIT(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count)	\
 	struct semaphore name = __SEMAPHORE_INIT(name,count)
 
diff --git a/include/asm-arm26/semaphore.h b/include/asm-arm26/semaphore.h
index c1b6a1edad9..ccf15e70410 100644
--- a/include/asm-arm26/semaphore.h
+++ b/include/asm-arm26/semaphore.h
@@ -25,9 +25,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait),	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INIT(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count)	\
 	struct semaphore name = __SEMAPHORE_INIT(name,count)
 
diff --git a/include/asm-cris/semaphore.h b/include/asm-cris/semaphore.h
index 8ed7636ab31..39faf69bcf7 100644
--- a/include/asm-cris/semaphore.h
+++ b/include/asm-cris/semaphore.h
@@ -33,9 +33,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)    \
 }
 
-#define __MUTEX_INITIALIZER(name) \
-        __SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
         struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-frv/semaphore.h b/include/asm-frv/semaphore.h
index 39354563080..b18396288df 100644
--- a/include/asm-frv/semaphore.h
+++ b/include/asm-frv/semaphore.h
@@ -47,9 +47,6 @@ struct semaphore {
 #define __SEMAPHORE_INITIALIZER(name,count) \
 { count, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __SEM_DEBUG_INIT(name) }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-h8300/semaphore.h b/include/asm-h8300/semaphore.h
index fe6ef377429..81bae2a9919 100644
--- a/include/asm-h8300/semaphore.h
+++ b/include/asm-h8300/semaphore.h
@@ -35,9 +35,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h
index ea563da63e2..6a42b2142fd 100644
--- a/include/asm-i386/semaphore.h
+++ b/include/asm-i386/semaphore.h
@@ -55,9 +55,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-ia64/semaphore.h b/include/asm-ia64/semaphore.h
index 3a2f0f3f78f..bb8906285fa 100644
--- a/include/asm-ia64/semaphore.h
+++ b/include/asm-ia64/semaphore.h
@@ -24,8 +24,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name)	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count)					\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
 
diff --git a/include/asm-m32r/semaphore.h b/include/asm-m32r/semaphore.h
index 53e3c60f21e..bf447c52a0a 100644
--- a/include/asm-m32r/semaphore.h
+++ b/include/asm-m32r/semaphore.h
@@ -32,9 +32,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-m68k/semaphore.h b/include/asm-m68k/semaphore.h
index ab94cf3ed44..fd4c7cc3d3b 100644
--- a/include/asm-m68k/semaphore.h
+++ b/include/asm-m68k/semaphore.h
@@ -36,9 +36,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-m68knommu/semaphore.h b/include/asm-m68knommu/semaphore.h
index febe85add50..17aee15906a 100644
--- a/include/asm-m68knommu/semaphore.h
+++ b/include/asm-m68knommu/semaphore.h
@@ -35,9 +35,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-mips/semaphore.h b/include/asm-mips/semaphore.h
index c2c97dec661..3d6aa7c7ea8 100644
--- a/include/asm-mips/semaphore.h
+++ b/include/asm-mips/semaphore.h
@@ -45,9 +45,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name, 1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name, count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-parisc/semaphore.h b/include/asm-parisc/semaphore.h
index f78bb2e3453..c9ee41cd070 100644
--- a/include/asm-parisc/semaphore.h
+++ b/include/asm-parisc/semaphore.h
@@ -49,9 +49,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-ppc/semaphore.h b/include/asm-ppc/semaphore.h
index 89e6e73be08..d592937359c 100644
--- a/include/asm-ppc/semaphore.h
+++ b/include/asm-ppc/semaphore.h
@@ -37,9 +37,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name, 1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name, count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-ppc64/semaphore.h b/include/asm-ppc64/semaphore.h
index aefe7753ea4..d9ecb996923 100644
--- a/include/asm-ppc64/semaphore.h
+++ b/include/asm-ppc64/semaphore.h
@@ -31,9 +31,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name, 1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name, count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 873def6f363..702cf436698 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -29,9 +29,6 @@ struct semaphore {
 #define __SEMAPHORE_INITIALIZER(name,count) \
 	{ ATOMIC_INIT(count), __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-sh/semaphore.h b/include/asm-sh/semaphore.h
index b923a77a8a7..489f7847c5d 100644
--- a/include/asm-sh/semaphore.h
+++ b/include/asm-sh/semaphore.h
@@ -33,9 +33,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-sh64/semaphore.h b/include/asm-sh64/semaphore.h
index fce22bb9a54..46952645914 100644
--- a/include/asm-sh64/semaphore.h
+++ b/include/asm-sh64/semaphore.h
@@ -40,9 +40,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-sparc/semaphore.h b/include/asm-sparc/semaphore.h
index 60ac5fd9eb4..f74ba31e265 100644
--- a/include/asm-sparc/semaphore.h
+++ b/include/asm-sparc/semaphore.h
@@ -22,9 +22,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-sparc64/semaphore.h b/include/asm-sparc64/semaphore.h
index 7419dd88b49..093dcc6788d 100644
--- a/include/asm-sparc64/semaphore.h
+++ b/include/asm-sparc64/semaphore.h
@@ -22,9 +22,6 @@ struct semaphore {
 	{ ATOMIC_INIT(count), \
 	  __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name, 1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name, count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h
index c514062bb69..df6cdecf6c1 100644
--- a/include/asm-v850/semaphore.h
+++ b/include/asm-v850/semaphore.h
@@ -18,9 +18,6 @@ struct semaphore {
 	{ ATOMIC_INIT (count), 0,					      \
 	  __WAIT_QUEUE_HEAD_INITIALIZER ((name).wait) }
 
-#define __MUTEX_INITIALIZER(name)					      \
-	__SEMAPHORE_INITIALIZER (name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count)	\
 	struct semaphore name = __SEMAPHORE_INITIALIZER (name,count)
 
diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h
index f325e39bf3b..a389aa6fe80 100644
--- a/include/asm-x86_64/semaphore.h
+++ b/include/asm-x86_64/semaphore.h
@@ -56,9 +56,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name,1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h
index 09e89ab3eb6..2a10e193b92 100644
--- a/include/asm-xtensa/semaphore.h
+++ b/include/asm-xtensa/semaphore.h
@@ -29,9 +29,6 @@ struct semaphore {
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
 }
 
-#define __MUTEX_INITIALIZER(name) 					\
-	__SEMAPHORE_INITIALIZER(name, 1)
-
 #define __DECLARE_SEMAPHORE_GENERIC(name,count) 			\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
-- 
cgit v1.2.3-70-g09d2