6 files changed, 546 insertions, 347 deletions
diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c
index f3b5466c389..4589ca33220 100644
--- a/arch/sparc/kernel/central.c
+++ b/arch/sparc/kernel/central.c
@@ -99,7 +99,7 @@ static int __devinit clock_board_probe(struct of_device *op,
 
 	p->leds_resource.start = (unsigned long)
 		(p->clock_regs + CLOCK_CTRL);
-	p->leds_resource.end = p->leds_resource.end;
+	p->leds_resource.end = p->leds_resource.start;
 	p->leds_resource.name = "leds";
 
 	p->leds_pdev.name = "sunfire-clockboard-leds";
@@ -194,7 +194,7 @@ static int __devinit fhc_probe(struct of_device *op,
 	if (!p->central) {
 		p->leds_resource.start = (unsigned long)
 			(p->pregs + FHC_PREGS_CTRL);
-		p->leds_resource.end = p->leds_resource.end;
+		p->leds_resource.end = p->leds_resource.start;
 		p->leds_resource.name = "leds";
 
 		p->leds_pdev.name = "sunfire-fhc-leds";
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 8d6882bb480..f2179cce1e4 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -250,12 +250,12 @@ struct irq_handler_data {
 };
 
 #ifdef CONFIG_SMP
-static int irq_choose_cpu(unsigned int virt_irq)
+static int irq_choose_cpu(unsigned int virt_irq, const struct cpumask *affinity)
 {
 	cpumask_t mask;
 	int cpuid;
 
-	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
+	cpumask_copy(&mask, affinity);
 	if (cpus_equal(mask, cpu_online_map)) {
 		cpuid = map_to_cpu(virt_irq);
 	} else {
@@ -268,7 +268,7 @@ static int irq_choose_cpu(unsigned int virt_irq)
 	return cpuid;
 }
 #else
-static int irq_choose_cpu(unsigned int virt_irq)
+static int irq_choose_cpu(unsigned int virt_irq, const struct cpumask *affinity)
 {
 	return real_hard_smp_processor_id();
 }
@@ -282,7 +282,8 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 		unsigned long cpuid, imap, val;
 		unsigned int tid;
 
-		cpuid = irq_choose_cpu(virt_irq);
+		cpuid = irq_choose_cpu(virt_irq,
+				       irq_desc[virt_irq].affinity);
 		imap = data->imap;
 
 		tid = sun4u_compute_tid(imap, cpuid);
@@ -299,7 +300,24 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 static int sun4u_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
-	sun4u_irq_enable(virt_irq);
+	struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+	if (likely(data)) {
+		unsigned long cpuid, imap, val;
+		unsigned int tid;
+
+		cpuid = irq_choose_cpu(virt_irq, mask);
+		imap = data->imap;
+
+		tid = sun4u_compute_tid(imap, cpuid);
+
+		val = upa_readq(imap);
+		val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+			 IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+		val |= tid | IMAP_VALID;
+		upa_writeq(val, imap);
+		upa_writeq(ICLR_IDLE, data->iclr);
+	}
 
 	return 0;
 }
@@ -340,7 +358,8 @@ static void sun4u_irq_eoi(unsigned int virt_irq)
 static void sun4v_irq_enable(unsigned int virt_irq)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
-	unsigned long cpuid = irq_choose_cpu(virt_irq);
+	unsigned long cpuid = irq_choose_cpu(virt_irq,
+					     irq_desc[virt_irq].affinity);
 	int err;
 
 	err = sun4v_intr_settarget(ino, cpuid);
@@ -361,7 +380,7 @@ static int sun4v_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
-	unsigned long cpuid = irq_choose_cpu(virt_irq);
+	unsigned long cpuid = irq_choose_cpu(virt_irq, mask);
 	int err;
 
 	err = sun4v_intr_settarget(ino, cpuid);
@@ -403,7 +422,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
 
-	cpuid = irq_choose_cpu(virt_irq);
+	cpuid = irq_choose_cpu(virt_irq, irq_desc[virt_irq].affinity);
 
 	dev_handle = virt_irq_table[virt_irq].dev_handle;
 	dev_ino = virt_irq_table[virt_irq].dev_ino;
@@ -433,7 +452,7 @@ static int sun4v_virt_set_affinity(unsigned int virt_irq,
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
 
-	cpuid = irq_choose_cpu(virt_irq);
+	cpuid = irq_choose_cpu(virt_irq, mask);
 
 	dev_handle = virt_irq_table[virt_irq].dev_handle;
 	dev_ino = virt_irq_table[virt_irq].dev_ino;
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 85e7037429b..4e2724ec2bb 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -30,6 +30,7 @@
 #include <asm/oplib.h>
 #include <asm/prom.h>
 #include <asm/pcic.h>
+#include <asm/timex.h>
 #include <asm/timer.h>
 #include <asm/uaccess.h>
 #include <asm/irq_regs.h>
@@ -163,8 +164,6 @@ void __iomem *pcic_regs;
 volatile int pcic_speculative;
 volatile int pcic_trapped;
 
-static void pci_do_gettimeofday(struct timeval *tv);
-static int pci_do_settimeofday(struct timespec *tv);
 
 #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
 
@@ -716,19 +715,27 @@ static irqreturn_t pcic_timer_handler (int irq, void *h)
 #define USECS_PER_JIFFY  10000  /* We have 100HZ "standard" timer for sparc */
 #define TICK_TIMER_LIMIT ((100*1000000/4)/100)
 
+u32 pci_gettimeoffset(void)
+{
+	/*
+	 * We divide all by 100
+	 * to have microsecond resolution and to avoid overflow
+	 */
+	unsigned long count =
+	    readl(pcic0.pcic_regs+PCI_SYS_COUNTER) & ~PCI_SYS_COUNTER_OVERFLOW;
+	count = ((count/100)*USECS_PER_JIFFY) / (TICK_TIMER_LIMIT/100);
+	return count * 1000;
+}
+
+
 void __init pci_time_init(void)
 {
 	struct linux_pcic *pcic = &pcic0;
 	unsigned long v;
 	int timer_irq, irq;
 
-	/* A hack until do_gettimeofday prototype is moved to arch specific headers
-	   and btfixupped. Patch do_gettimeofday with ba pci_do_gettimeofday; nop */
-	((unsigned int *)do_gettimeofday)[0] = 
-	    0x10800000 | ((((unsigned long)pci_do_gettimeofday -
-	     (unsigned long)do_gettimeofday) >> 2) & 0x003fffff);
-	((unsigned int *)do_gettimeofday)[1] = 0x01000000;
-	BTFIXUPSET_CALL(bus_do_settimeofday, pci_do_settimeofday, BTFIXUPCALL_NORM);
+	do_arch_gettimeoffset = pci_gettimeoffset;
+
 	btfixup();
 
 	writel (TICK_TIMER_LIMIT, pcic->pcic_regs+PCI_SYS_LIMIT);
@@ -746,84 +753,6 @@ void __init pci_time_init(void)
 	local_irq_enable();
 }
 
-static inline unsigned long do_gettimeoffset(void)
-{
-	/*
-	 * We divide all by 100
-	 * to have microsecond resolution and to avoid overflow
-	 */
-	unsigned long count =
-	    readl(pcic0.pcic_regs+PCI_SYS_COUNTER) & ~PCI_SYS_COUNTER_OVERFLOW;
-	count = ((count/100)*USECS_PER_JIFFY) / (TICK_TIMER_LIMIT/100);
-	return count;
-}
-
-static void pci_do_gettimeofday(struct timeval *tv)
-{
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick = tick_usec - tickadj;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = do_gettimeoffset();
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0))
-			usec = min(usec, max_ntp_tick);
-
-		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-static int pci_do_settimeofday(struct timespec *tv)
-{
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	tv->tv_nsec -= 1000 * do_gettimeoffset();
-	while (tv->tv_nsec < 0) {
-		tv->tv_nsec += NSEC_PER_SEC;
-		tv->tv_sec--;
-	}
-
-	wall_to_monotonic.tv_sec += xtime.tv_sec - tv->tv_sec;
-	wall_to_monotonic.tv_nsec += xtime.tv_nsec - tv->tv_nsec;
-
-	if (wall_to_monotonic.tv_nsec > NSEC_PER_SEC) {
-		wall_to_monotonic.tv_nsec -= NSEC_PER_SEC;
-		wall_to_monotonic.tv_sec++;
-	}
-	if (wall_to_monotonic.tv_nsec < 0) {
-		wall_to_monotonic.tv_nsec += NSEC_PER_SEC;
-		wall_to_monotonic.tv_sec--;
-	}
-
-	xtime.tv_sec = tv->tv_sec;
-	xtime.tv_nsec = tv->tv_nsec;
-	ntp_clear();
-	return 0;
-}
 
 #if 0
 static void watchdog_reset() {
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 198fb4e79ba..e856456ec02 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1,6 +1,6 @@
 /* Performance event support for sparc64.
  *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
  *
  * This code is based almost entirely upon the x86 perf event
  * code, which is:
@@ -18,11 +18,15 @@
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
 
+#include <asm/stacktrace.h>
 #include <asm/cpudata.h>
+#include <asm/uaccess.h>
 #include <asm/atomic.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 
+#include "kstack.h"
+
 /* Sparc64 chips have two performance counters, 32-bits each, with
  * overflow interrupts generated on transition from 0xffffffff to 0.
  * The counters are accessed in one go using a 64-bit register.
@@ -51,16 +55,49 @@
 
 #define PIC_UPPER_INDEX			0
 #define PIC_LOWER_INDEX			1
+#define PIC_NO_INDEX			-1
 
 struct cpu_hw_events {
-	struct perf_event	*events[MAX_HWEVENTS];
-	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
-	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	/* Number of events currently scheduled onto this cpu.
+	 * This tells how many entries in the arrays below
+	 * are valid.
+	 */
+	int			n_events;
+
+	/* Number of new events added since the last hw_perf_disable().
+	 * This works because the perf event layer always adds new
+	 * events inside of a perf_{disable,enable}() sequence.
+	 */
+	int			n_added;
+
+	/* Array of events current scheduled on this cpu.  */
+	struct perf_event	*event[MAX_HWEVENTS];
+
+	/* Array of encoded longs, specifying the %pcr register
+	 * encoding and the mask of PIC counters this even can
+	 * be scheduled on.  See perf_event_encode() et al.
+	 */
+	unsigned long		events[MAX_HWEVENTS];
+
+	/* The current counter index assigned to an event.  When the
+	 * event hasn't been programmed into the cpu yet, this will
+	 * hold PIC_NO_INDEX.  The event->hw.idx value tells us where
+	 * we ought to schedule the event.
+	 */
+	int			current_idx[MAX_HWEVENTS];
+
+	/* Software copy of %pcr register on this cpu.  */
 	u64			pcr;
+
+	/* Enabled/disable state.  */
 	int			enabled;
 };
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
 
+/* An event map describes the characteristics of a performance
+ * counter event.  In particular it gives the encoding as well as
+ * a mask telling which counters the event can be measured on.
+ */
 struct perf_event_map {
 	u16	encoding;
 	u8	pic_mask;
@@ -69,15 +106,20 @@ struct perf_event_map {
 #define PIC_LOWER	0x02
 };
 
+/* Encode a perf_event_map entry into a long.  */
 static unsigned long perf_event_encode(const struct perf_event_map *pmap)
 {
 	return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
 }
 
-static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk)
+static u8 perf_event_get_msk(unsigned long val)
 {
-	*msk = val & 0xff;
-	*enc = val >> 16;
+	return val & 0xff;
+}
+
+static u64 perf_event_get_enc(unsigned long val)
+{
+	return val >> 16;
 }
 
 #define C(x) PERF_COUNT_HW_CACHE_##x
@@ -491,53 +533,6 @@ static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw
 	pcr_ops->write(cpuc->pcr);
 }
 
-void hw_perf_enable(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val;
-	int i;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	val = cpuc->pcr;
-
-	for (i = 0; i < MAX_HWEVENTS; i++) {
-		struct perf_event *cp = cpuc->events[i];
-		struct hw_perf_event *hwc;
-
-		if (!cp)
-			continue;
-		hwc = &cp->hw;
-		val |= hwc->config_base;
-	}
-
-	cpuc->pcr = val;
-
-	pcr_ops->write(cpuc->pcr);
-}
-
-void hw_perf_disable(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-
-	val = cpuc->pcr;
-	val &= ~(PCR_UTRACE | PCR_STRACE |
-		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
-	cpuc->pcr = val;
-
-	pcr_ops->write(cpuc->pcr);
-}
-
 static u32 read_pmc(int idx)
 {
 	u64 val;
@@ -566,6 +561,30 @@ static void write_pmc(int idx, u64 val)
 	write_pic(pic);
 }
 
+static u64 sparc_perf_event_update(struct perf_event *event,
+				   struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = read_pmc(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
 static int sparc_perf_event_set_period(struct perf_event *event,
 				       struct hw_perf_event *hwc, int idx)
 {
@@ -598,81 +617,166 @@ static int sparc_perf_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static int sparc_pmu_enable(struct perf_event *event)
+/* If performance event entries have been added, move existing
+ * events around (if necessary) and then assign new entries to
+ * counters.
+ */
+static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
+	int i;
 
-	if (test_and_set_bit(idx, cpuc->used_mask))
-		return -EAGAIN;
+	if (!cpuc->n_added)
+		goto out;
 
-	sparc_pmu_disable_event(cpuc, hwc, idx);
+	/* Read in the counters which are moving.  */
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
 
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+		if (cpuc->current_idx[i] != PIC_NO_INDEX &&
+		    cpuc->current_idx[i] != cp->hw.idx) {
+			sparc_perf_event_update(cp, &cp->hw,
+						cpuc->current_idx[i]);
+			cpuc->current_idx[i] = PIC_NO_INDEX;
+		}
+	}
 
-	sparc_perf_event_set_period(event, hwc, idx);
-	sparc_pmu_enable_event(cpuc, hwc, idx);
-	perf_event_update_userpage(event);
-	return 0;
+	/* Assign to counters all unassigned events.  */
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
+		struct hw_perf_event *hwc = &cp->hw;
+		int idx = hwc->idx;
+		u64 enc;
+
+		if (cpuc->current_idx[i] != PIC_NO_INDEX)
+			continue;
+
+		sparc_perf_event_set_period(cp, hwc, idx);
+		cpuc->current_idx[i] = idx;
+
+		enc = perf_event_get_enc(cpuc->events[i]);
+		pcr |= event_encoding(enc, idx);
+	}
+out:
+	return pcr;
 }
 
-static u64 sparc_perf_event_update(struct perf_event *event,
-				   struct hw_perf_event *hwc, int idx)
+void hw_perf_enable(void)
 {
-	int shift = 64 - 32;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 pcr;
 
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	new_raw_count = read_pmc(idx);
+	if (cpuc->enabled)
+		return;
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
+	cpuc->enabled = 1;
+	barrier();
 
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
+	pcr = cpuc->pcr;
+	if (!cpuc->n_events) {
+		pcr = 0;
+	} else {
+		pcr = maybe_change_configuration(cpuc, pcr);
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+		/* We require that all of the events have the same
+		 * configuration, so just fetch the settings from the
+		 * first entry.
+		 */
+		cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
+	}
 
-	return new_raw_count;
+	pcr_ops->write(cpuc->pcr);
+}
+
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	cpuc->n_added = 0;
+
+	val = cpuc->pcr;
+	val &= ~(PCR_UTRACE | PCR_STRACE |
+		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+	cpuc->pcr = val;
+
+	pcr_ops->write(cpuc->pcr);
 }
 
 static void sparc_pmu_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
+	unsigned long flags;
+	int i;
 
-	clear_bit(idx, cpuc->active_mask);
-	sparc_pmu_disable_event(cpuc, hwc, idx);
+	local_irq_save(flags);
+	perf_disable();
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (event == cpuc->event[i]) {
+			int idx = cpuc->current_idx[i];
+
+			/* Shift remaining entries down into
+			 * the existing slot.
+			 */
+			while (++i < cpuc->n_events) {
+				cpuc->event[i - 1] = cpuc->event[i];
+				cpuc->events[i - 1] = cpuc->events[i];
+				cpuc->current_idx[i - 1] =
+					cpuc->current_idx[i];
+			}
+
+			/* Absorb the final count and turn off the
+			 * event.
+			 */
+			sparc_pmu_disable_event(cpuc, hwc, idx);
+			barrier();
+			sparc_perf_event_update(event, hwc, idx);
 
-	barrier();
+			perf_event_update_userpage(event);
 
-	sparc_perf_event_update(event, hwc, idx);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+			cpuc->n_events--;
+			break;
+		}
+	}
 
-	perf_event_update_userpage(event);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+static int active_event_index(struct cpu_hw_events *cpuc,
+			      struct perf_event *event)
+{
+	int i;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (cpuc->event[i] == event)
+			break;
+	}
+	BUG_ON(i == cpuc->n_events);
+	return cpuc->current_idx[i];
 }
 
 static void sparc_pmu_read(struct perf_event *event)
 {
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
 	struct hw_perf_event *hwc = &event->hw;
 
-	sparc_perf_event_update(event, hwc, hwc->idx);
+	sparc_perf_event_update(event, hwc, idx);
 }
 
 static void sparc_pmu_unthrottle(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
 	struct hw_perf_event *hwc = &event->hw;
 
-	sparc_pmu_enable_event(cpuc, hwc, hwc->idx);
+	sparc_pmu_enable_event(cpuc, hwc, idx);
 }
 
 static atomic_t active_events = ATOMIC_INIT(0);
@@ -750,43 +854,75 @@ static void hw_perf_event_destroy(struct perf_event *event)
 /* Make sure all events can be scheduled into the hardware at
  * the same time.  This is simplified by the fact that we only
  * need to support 2 simultaneous HW events.
+ *
+ * As a side effect, the evts[]->hw.idx values will be assigned
+ * on success.  These are pending indexes.  When the events are
+ * actually programmed into the chip, these values will propagate
+ * to the per-cpu cpuc->current_idx[] slots, see the code in
+ * maybe_change_configuration() for details.
  */
-static int sparc_check_constraints(unsigned long *events, int n_ev)
+static int sparc_check_constraints(struct perf_event **evts,
+				   unsigned long *events, int n_ev)
 {
-	if (n_ev <= perf_max_events) {
-		u8 msk1, msk2;
-		u16 dummy;
-
-		if (n_ev == 1)
-			return 0;
-		BUG_ON(n_ev != 2);
-		perf_event_decode(events[0], &dummy, &msk1);
-		perf_event_decode(events[1], &dummy, &msk2);
-
-		/* If both events can go on any counter, OK.  */
-		if (msk1 == (PIC_UPPER | PIC_LOWER) &&
-		    msk2 == (PIC_UPPER | PIC_LOWER))
-			return 0;
-
-		/* If one event is limited to a specific counter,
-		 * and the other can go on both, OK.
-		 */
-		if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
-		    msk2 == (PIC_UPPER | PIC_LOWER))
-			return 0;
-		if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) &&
-		    msk1 == (PIC_UPPER | PIC_LOWER))
-			return 0;
-
-		/* If the events are fixed to different counters, OK.  */
-		if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) ||
-		    (msk1 == PIC_LOWER && msk2 == PIC_UPPER))
-			return 0;
-
-		/* Otherwise, there is a conflict.  */
+	u8 msk0 = 0, msk1 = 0;
+	int idx0 = 0;
+
+	/* This case is possible when we are invoked from
+	 * hw_perf_group_sched_in().
+	 */
+	if (!n_ev)
+		return 0;
+
+	if (n_ev > perf_max_events)
+		return -1;
+
+	msk0 = perf_event_get_msk(events[0]);
+	if (n_ev == 1) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
 	}
+	BUG_ON(n_ev != 2);
+	msk1 = perf_event_get_msk(events[1]);
+
+	/* If both events can go on any counter, OK.  */
+	if (msk0 == (PIC_UPPER | PIC_LOWER) &&
+	    msk1 == (PIC_UPPER | PIC_LOWER))
+		goto success;
 
+	/* If one event is limited to a specific counter,
+	 * and the other can go on both, OK.
+	 */
+	if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) &&
+	    msk1 == (PIC_UPPER | PIC_LOWER)) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
+	}
+
+	if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
+	    msk0 == (PIC_UPPER | PIC_LOWER)) {
+		if (msk1 & PIC_UPPER)
+			idx0 = 1;
+		goto success;
+	}
+
+	/* If the events are fixed to different counters, OK.  */
+	if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) ||
+	    (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
+	}
+
+	/* Otherwise, there is a conflict.  */
 	return -1;
+
+success:
+	evts[0]->hw.idx = idx0;
+	if (n_ev == 2)
+		evts[1]->hw.idx = idx0 ^ 1;
+	return 0;
 }
 
 static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
@@ -818,7 +954,8 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
 }
 
 static int collect_events(struct perf_event *group, int max_count,
-			  struct perf_event *evts[], unsigned long *events)
+			  struct perf_event *evts[], unsigned long *events,
+			  int *current_idx)
 {
 	struct perf_event *event;
 	int n = 0;
@@ -827,7 +964,8 @@ static int collect_events(struct perf_event *group, int max_count,
 		if (n >= max_count)
 			return -1;
 		evts[n] = group;
-		events[n++] = group->hw.event_base;
+		events[n] = group->hw.event_base;
+		current_idx[n++] = PIC_NO_INDEX;
 	}
 	list_for_each_entry(event, &group->sibling_list, group_entry) {
 		if (!is_software_event(event) &&
@@ -835,20 +973,100 @@ static int collect_events(struct perf_event *group, int max_count,
 			if (n >= max_count)
 				return -1;
 			evts[n] = event;
-			events[n++] = event->hw.event_base;
+			events[n] = event->hw.event_base;
+			current_idx[n++] = PIC_NO_INDEX;
 		}
 	}
 	return n;
 }
 
+static void event_sched_in(struct perf_event *event, int cpu)
+{
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+	if (is_software_event(event))
+		event->pmu->enable(event);
+}
+
+int hw_perf_group_sched_in(struct perf_event *group_leader,
+			   struct perf_cpu_context *cpuctx,
+			   struct perf_event_context *ctx, int cpu)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *sub;
+	int n0, n;
+
+	if (!sparc_pmu)
+		return 0;
+
+	n0 = cpuc->n_events;
+	n = collect_events(group_leader, perf_max_events - n0,
+			   &cpuc->event[n0], &cpuc->events[n0],
+			   &cpuc->current_idx[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (check_excludes(cpuc->event, n0, n))
+		return -EINVAL;
+	if (sparc_check_constraints(cpuc->event, cpuc->events, n + n0))
+		return -EAGAIN;
+	cpuc->n_events = n0 + n;
+	cpuc->n_added += n;
+
+	cpuctx->active_oncpu += n;
+	n = 1;
+	event_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
+		if (sub->state != PERF_EVENT_STATE_OFF) {
+			event_sched_in(sub, cpu);
+			n++;
+		}
+	}
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+static int sparc_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int n0, ret = -EAGAIN;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	n0 = cpuc->n_events;
+	if (n0 >= perf_max_events)
+		goto out;
+
+	cpuc->event[n0] = event;
+	cpuc->events[n0] = event->hw.event_base;
+	cpuc->current_idx[n0] = PIC_NO_INDEX;
+
+	if (check_excludes(cpuc->event, n0, 1))
+		goto out;
+	if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1))
+		goto out;
+
+	cpuc->n_events++;
+	cpuc->n_added++;
+
+	ret = 0;
+out:
+	perf_enable();
+	local_irq_restore(flags);
+	return ret;
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct perf_event *evts[MAX_HWEVENTS];
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long events[MAX_HWEVENTS];
+	int current_idx_dmy[MAX_HWEVENTS];
 	const struct perf_event_map *pmap;
-	u64 enc;
 	int n;
 
 	if (atomic_read(&nmi_active) < 0)
@@ -865,10 +1083,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	} else
 		return -EOPNOTSUPP;
 
-	/* We save the enable bits in the config_base.  So to
-	 * turn off sampling just write 'config', and to enable
-	 * things write 'config | config_base'.
-	 */
+	/* We save the enable bits in the config_base.  */
 	hwc->config_base = sparc_pmu->irq_bit;
 	if (!attr->exclude_user)
 		hwc->config_base |= PCR_UTRACE;
@@ -879,13 +1094,11 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 	hwc->event_base = perf_event_encode(pmap);
 
-	enc = pmap->encoding;
-
 	n = 0;
 	if (event->group_leader != event) {
 		n = collect_events(event->group_leader,
 				   perf_max_events - 1,
-				   evts, events);
+				   evts, events, current_idx_dmy);
 		if (n < 0)
 			return -EINVAL;
 	}
@@ -895,9 +1108,11 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (check_excludes(evts, n, 1))
 		return -EINVAL;
 
-	if (sparc_check_constraints(events, n + 1))
+	if (sparc_check_constraints(evts, events, n + 1))
 		return -EINVAL;
 
+	hwc->idx = PIC_NO_INDEX;
+
 	/* Try to do all error checking before this point, as unwinding
 	 * state after grabbing the PMC is difficult.
 	 */
@@ -910,15 +1125,6 @@ static int __hw_perf_event_init(struct perf_event *event)
 		atomic64_set(&hwc->period_left, hwc->sample_period);
 	}
 
-	if (pmap->pic_mask & PIC_UPPER) {
-		hwc->idx = PIC_UPPER_INDEX;
-		enc <<= sparc_pmu->upper_shift;
-	} else {
-		hwc->idx = PIC_LOWER_INDEX;
-		enc <<= sparc_pmu->lower_shift;
-	}
-
-	hwc->config |= enc;
 	return 0;
 }
 
@@ -968,7 +1174,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
 	struct pt_regs *regs;
-	int idx;
+	int i;
 
 	if (!atomic_read(&active_events))
 		return NOTIFY_DONE;
@@ -997,13 +1203,12 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 	if (sparc_pmu->irq_bit)
 		pcr_ops->write(cpuc->pcr);
 
-	for (idx = 0; idx < MAX_HWEVENTS; idx++) {
-		struct perf_event *event = cpuc->events[idx];
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *event = cpuc->event[i];
+		int idx = cpuc->current_idx[i];
 		struct hw_perf_event *hwc;
 		u64 val;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
 		hwc = &event->hw;
 		val = sparc_perf_event_update(event, hwc, idx);
 		if (val & (1ULL << 31))
@@ -1055,10 +1260,122 @@ void __init init_hw_perf_events(void)
 
 	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-	/* All sparc64 PMUs currently have 2 events.  But this simple
-	 * driver only supports one active event at a time.
-	 */
-	perf_max_events = 1;
+	/* All sparc64 PMUs currently have 2 events.  */
+	perf_max_events = 2;
 
 	register_die_notifier(&perf_event_nmi_notifier);
 }
+
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static void perf_callchain_kernel(struct pt_regs *regs,
+				  struct perf_callchain_entry *entry)
+{
+	unsigned long ksp, fp;
+
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->tpc);
+
+	ksp = regs->u_regs[UREG_I6];
+	fp = ksp + STACK_BIAS;
+	do {
+		struct sparc_stackf *sf;
+		struct pt_regs *regs;
+		unsigned long pc;
+
+		if (!kstack_valid(current_thread_info(), fp))
+			break;
+
+		sf = (struct sparc_stackf *) fp;
+		regs = (struct pt_regs *) (sf + 1);
+
+		if (kstack_is_trap_frame(current_thread_info(), regs)) {
+			if (user_mode(regs))
+				break;
+			pc = regs->tpc;
+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+		} else {
+			pc = sf->callers_pc;
+			fp = (unsigned long)sf->fp + STACK_BIAS;
+		}
+		callchain_store(entry, pc);
+	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_64(struct pt_regs *regs,
+				   struct perf_callchain_entry *entry)
+{
+	unsigned long ufp;
+
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, regs->tpc);
+
+	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+	do {
+		struct sparc_stackf *usf, sf;
+		unsigned long pc;
+
+		usf = (struct sparc_stackf *) ufp;
+		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+			break;
+
+		pc = sf.callers_pc;
+		ufp = (unsigned long)sf.fp + STACK_BIAS;
+		callchain_store(entry, pc);
+	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_32(struct pt_regs *regs,
+				   struct perf_callchain_entry *entry)
+{
+	unsigned long ufp;
+
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, regs->tpc);
+
+	ufp = regs->u_regs[UREG_I6];
+	do {
+		struct sparc_stackf32 *usf, sf;
+		unsigned long pc;
+
+		usf = (struct sparc_stackf32 *) ufp;
+		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+			break;
+
+		pc = sf.callers_pc;
+		ufp = (unsigned long)sf.fp;
+		callchain_store(entry, pc);
+	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+/* Like powerpc we can't get PMU interrupts within the PMU handler,
+ * so no need for seperate NMI and IRQ chains as on x86.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+	entry->nr = 0;
+	if (!user_mode(regs)) {
+		stack_trace_flush();
+		perf_callchain_kernel(regs, entry);
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+	if (regs) {
+		flushw_user();
+		if (test_thread_flag(TIF_32BIT))
+			perf_callchain_user_32(regs, entry);
+		else
+			perf_callchain_user_64(regs, entry);
+	}
+	return entry;
+}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index cfa0e19abe3..d77f5431694 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -365,6 +365,7 @@ EXPORT_SYMBOL(get_fb_unmapped_area);
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	unsigned long random_factor = 0UL;
+	unsigned long gap;
 
 	if (current->flags & PF_RANDOMIZE) {
 		random_factor = get_random_int();
@@ -379,9 +380,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
+	gap = rlimit(RLIMIT_STACK);
 	if (!test_thread_flag(TIF_32BIT) ||
 	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY ||
+	    gap == RLIM_INFINITY ||
 	    sysctl_legacy_va_layout) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
@@ -389,9 +391,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	} else {
 		/* We know it's 32-bit */
 		unsigned long task_size = STACK_TOP32;
-		unsigned long gap;
 
-		gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
 		if (gap < 128 * 1024 * 1024)
 			gap = 128 * 1024 * 1024;
 		if (gap > (task_size / 6 * 5))
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 5b2f595fe65..0d4c09b15ef 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -35,6 +35,7 @@
 #include <linux/platform_device.h>
 
 #include <asm/oplib.h>
+#include <asm/timex.h>
 #include <asm/timer.h>
 #include <asm/system.h>
 #include <asm/irq.h>
@@ -51,7 +52,6 @@ DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
 static int set_rtc_mmss(unsigned long);
-static int sbus_do_settimeofday(struct timespec *tv);
 
 unsigned long profile_pc(struct pt_regs *regs)
 {
@@ -76,6 +76,8 @@ EXPORT_SYMBOL(profile_pc);
 
 __volatile__ unsigned int *master_l10_counter;
 
+u32 (*do_arch_gettimeoffset)(void);
+
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
@@ -196,35 +198,14 @@ static int __init clock_init(void)
 {
 	return of_register_driver(&clock_driver, &of_platform_bus_type);
 }
-
 /* Must be after subsys_initcall() so that busses are probed.  Must
  * be before device_initcall() because things like the RTC driver
  * need to see the clock registers.
  */
 fs_initcall(clock_init);
 
-static void __init sbus_time_init(void)
-{
-
-	BTFIXUPSET_CALL(bus_do_settimeofday, sbus_do_settimeofday, BTFIXUPCALL_NORM);
-	btfixup();
-
-	sparc_init_timers(timer_interrupt);
-}
-
-void __init time_init(void)
-{
-#ifdef CONFIG_PCI
-	extern void pci_time_init(void);
-	if (pcic_present()) {
-		pci_time_init();
-		return;
-	}
-#endif
-	sbus_time_init();
-}
 
-static inline unsigned long do_gettimeoffset(void)
+u32 sbus_do_gettimeoffset(void)
 {
 	unsigned long val = *master_l10_counter;
 	unsigned long usec = (val >> 10) & 0x1fffff;
@@ -233,86 +214,39 @@ static inline unsigned long do_gettimeoffset(void)
 	if (val & 0x80000000)
 		usec += 1000000 / HZ;
 
-	return usec;
+	return usec * 1000;
 }
 
-/* Ok, my cute asm atomicity trick doesn't work anymore.
- * There are just too many variables that need to be protected
- * now (both members of xtime, et al.)
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long flags;
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick = tick_usec - tickadj;
-
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		usec = do_gettimeoffset();
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0))
-			usec = min(usec, max_ntp_tick);
-
-		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
 
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
+u32 arch_gettimeoffset(void)
 {
-	int ret;
-
-	write_seqlock_irq(&xtime_lock);
-	ret = bus_do_settimeofday(tv);
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return ret;
+	if (unlikely(!do_arch_gettimeoffset))
+		return 0;
+	return do_arch_gettimeoffset();
 }
 
-EXPORT_SYMBOL(do_settimeofday);
-
-static int sbus_do_settimeofday(struct timespec *tv)
+static void __init sbus_time_init(void)
 {
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
+	do_arch_gettimeoffset = sbus_do_gettimeoffset;
 
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= 1000 * do_gettimeoffset();
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+	btfixup();
 
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+	sparc_init_timers(timer_interrupt);
+}
 
-	ntp_clear();
-	return 0;
+void __init time_init(void)
+{
+#ifdef CONFIG_PCI
+	extern void pci_time_init(void);
+	if (pcic_present()) {
+		pci_time_init();
+		return;
+	}
+#endif
+	sbus_time_init();
 }
 
+
 static int set_rtc_mmss(unsigned long secs)
 {
 	struct rtc_device *rtc = rtc_class_open("rtc0");