summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/unistd.h6
-rw-r--r--arch/alpha/kernel/systbls.S12
-rw-r--r--arch/alpha/kernel/time.c3
-rw-r--r--arch/arm/mach-omap2/board-3430sdp.c19
-rw-r--r--arch/arm/mach-omap2/board-rx51.c18
-rw-r--r--arch/arm/mach-omap2/cpuidle34xx.c436
-rw-r--r--arch/arm/mach-omap2/pm.h17
-rw-r--r--arch/arm/mach-omap2/pm34xx.c14
-rw-r--r--arch/mips/ar7/gpio.c4
-rw-r--r--arch/mips/include/asm/dma-mapping.h2
-rw-r--r--arch/mips/kernel/traps.c6
-rw-r--r--arch/mips/rb532/gpio.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c7
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c7
-rw-r--r--arch/sparc/kernel/apc.c2
-rw-r--r--arch/sparc/kernel/pci_sabre.c5
-rw-r--r--arch/sparc/kernel/pci_schizo.c8
-rw-r--r--arch/sparc/kernel/pmc.c2
-rw-r--r--arch/sparc/kernel/smp_32.c10
-rw-r--r--arch/sparc/kernel/time_32.c2
-rw-r--r--arch/sparc/lib/checksum_32.S12
-rw-r--r--arch/um/os-Linux/util.c23
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h17
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h16
-rw-r--r--arch/x86/include/asm/x86_init.h12
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c48
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/mm/init.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c92
-rw-r--r--arch/x86/xen/mmu.c138
37 files changed, 451 insertions, 543 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937bf5a7..b1834166922 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,14 @@
#define __NR_fanotify_init 494
#define __NR_fanotify_mark 495
#define __NR_prlimit64 496
+#define __NR_name_to_handle_at 497
+#define __NR_open_by_handle_at 498
+#define __NR_clock_adjtime 499
+#define __NR_syncfs 500
#ifdef __KERNEL__
-#define NR_SYSCALLS 497
+#define NR_SYSCALLS 501
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9db16..15f999d41c7 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -498,23 +498,27 @@ sys_call_table:
.quad sys_ni_syscall /* sys_timerfd */
.quad sys_eventfd
.quad sys_recvmmsg
- .quad sys_fallocate /* 480 */
+ .quad sys_fallocate /* 480 */
.quad sys_timerfd_create
.quad sys_timerfd_settime
.quad sys_timerfd_gettime
.quad sys_signalfd4
- .quad sys_eventfd2 /* 485 */
+ .quad sys_eventfd2 /* 485 */
.quad sys_epoll_create1
.quad sys_dup3
.quad sys_pipe2
.quad sys_inotify_init1
- .quad sys_preadv /* 490 */
+ .quad sys_preadv /* 490 */
.quad sys_pwritev
.quad sys_rt_tgsigqueueinfo
.quad sys_perf_event_open
.quad sys_fanotify_init
- .quad sys_fanotify_mark /* 495 */
+ .quad sys_fanotify_mark /* 495 */
.quad sys_prlimit64
+ .quad sys_name_to_handle_at
+ .quad sys_open_by_handle_at
+ .quad sys_clock_adjtime
+ .quad sys_syncfs /* 500 */
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 918e8e0b72f..818e74ed45d 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = {
static inline void register_rpcc_clocksource(long cycle_freq)
{
- clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4);
- clocksource_register(&clocksource_rpcc);
+ clocksource_register_hz(&clocksource_rpcc, cycle_freq);
}
#else /* !CONFIG_SMP */
static inline void register_rpcc_clocksource(long cycle_freq)
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 99218a5299c..52dbdf3ab66 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -59,24 +59,6 @@
#define TWL4030_MSECURE_GPIO 22
-/* FIXME: These values need to be updated based on more profiling on 3430sdp*/
-static struct cpuidle_params omap3_cpuidle_params_table[] = {
- /* C1 */
- {1, 2, 2, 5},
- /* C2 */
- {1, 10, 10, 30},
- /* C3 */
- {1, 50, 50, 300},
- /* C4 */
- {1, 1500, 1800, 4000},
- /* C5 */
- {1, 2500, 7500, 12000},
- /* C6 */
- {1, 3000, 8500, 15000},
- /* C7 */
- {1, 10000, 30000, 300000},
-};
-
static uint32_t board_keymap[] = {
KEY(0, 0, KEY_LEFT),
KEY(0, 1, KEY_RIGHT),
@@ -800,7 +782,6 @@ static void __init omap_3430sdp_init(void)
omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
omap_board_config = sdp3430_config;
omap_board_config_size = ARRAY_SIZE(sdp3430_config);
- omap3_pm_init_cpuidle(omap3_cpuidle_params_table);
omap3430_i2c_init();
omap_display_init(&sdp3430_dss_data);
if (omap_rev() > OMAP3430_REV_ES1_0)
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index f8ba20a14e6..fec4cac8fa0 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -58,21 +58,25 @@ static struct platform_device leds_gpio = {
},
};
+/*
+ * cpuidle C-states definition override from the default values.
+ * The 'exit_latency' field is the sum of sleep and wake-up latencies.
+ */
static struct cpuidle_params rx51_cpuidle_params[] = {
/* C1 */
- {1, 110, 162, 5},
+ {110 + 162, 5 , 1},
/* C2 */
- {1, 106, 180, 309},
+ {106 + 180, 309, 1},
/* C3 */
- {0, 107, 410, 46057},
+ {107 + 410, 46057, 0},
/* C4 */
- {0, 121, 3374, 46057},
+ {121 + 3374, 46057, 0},
/* C5 */
- {1, 855, 1146, 46057},
+ {855 + 1146, 46057, 1},
/* C6 */
- {0, 7580, 4134, 484329},
+ {7580 + 4134, 484329, 0},
/* C7 */
- {1, 7505, 15274, 484329},
+ {7505 + 15274, 484329, 1},
};
static struct omap_lcd_config rx51_lcd_config = {
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index 1c240eff391..4bf6e6e8b10 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -36,36 +36,6 @@
#ifdef CONFIG_CPU_IDLE
-#define OMAP3_MAX_STATES 7
-#define OMAP3_STATE_C1 0 /* C1 - MPU WFI + Core active */
-#define OMAP3_STATE_C2 1 /* C2 - MPU WFI + Core inactive */
-#define OMAP3_STATE_C3 2 /* C3 - MPU CSWR + Core inactive */
-#define OMAP3_STATE_C4 3 /* C4 - MPU OFF + Core iactive */
-#define OMAP3_STATE_C5 4 /* C5 - MPU RET + Core RET */
-#define OMAP3_STATE_C6 5 /* C6 - MPU OFF + Core RET */
-#define OMAP3_STATE_C7 6 /* C7 - MPU OFF + Core OFF */
-
-#define OMAP3_STATE_MAX OMAP3_STATE_C7
-
-#define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */
-
-struct omap3_processor_cx {
- u8 valid;
- u8 type;
- u32 sleep_latency;
- u32 wakeup_latency;
- u32 mpu_state;
- u32 core_state;
- u32 threshold;
- u32 flags;
- const char *desc;
-};
-
-struct omap3_processor_cx omap3_power_states[OMAP3_MAX_STATES];
-struct omap3_processor_cx current_cx_state;
-struct powerdomain *mpu_pd, *core_pd, *per_pd;
-struct powerdomain *cam_pd;
-
/*
* The latencies/thresholds for various C states have
* to be configured from the respective board files.
@@ -75,27 +45,31 @@ struct powerdomain *cam_pd;
*/
static struct cpuidle_params cpuidle_params_table[] = {
/* C1 */
- {1, 2, 2, 5},
+ {2 + 2, 5, 1},
/* C2 */
- {1, 10, 10, 30},
+ {10 + 10, 30, 1},
/* C3 */
- {1, 50, 50, 300},
+ {50 + 50, 300, 1},
/* C4 */
- {1, 1500, 1800, 4000},
+ {1500 + 1800, 4000, 1},
/* C5 */
- {1, 2500, 7500, 12000},
+ {2500 + 7500, 12000, 1},
/* C6 */
- {1, 3000, 8500, 15000},
+ {3000 + 8500, 15000, 1},
/* C7 */
- {1, 10000, 30000, 300000},
+ {10000 + 30000, 300000, 1},
};
+#define OMAP3_NUM_STATES ARRAY_SIZE(cpuidle_params_table)
-static int omap3_idle_bm_check(void)
-{
- if (!omap3_can_sleep())
- return 1;
- return 0;
-}
+/* Mach specific information to be recorded in the C-state driver_data */
+struct omap3_idle_statedata {
+ u32 mpu_state;
+ u32 core_state;
+ u8 valid;
+};
+struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES];
+
+struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd;
static int _cpuidle_allow_idle(struct powerdomain *pwrdm,
struct clockdomain *clkdm)
@@ -122,12 +96,10 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm,
static int omap3_enter_idle(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- struct omap3_processor_cx *cx = cpuidle_get_statedata(state);
+ struct omap3_idle_statedata *cx = cpuidle_get_statedata(state);
struct timespec ts_preidle, ts_postidle, ts_idle;
u32 mpu_state = cx->mpu_state, core_state = cx->core_state;
- current_cx_state = *cx;
-
/* Used to keep track of the total time in idle */
getnstimeofday(&ts_preidle);
@@ -140,7 +112,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
if (omap_irq_pending() || need_resched())
goto return_sleep_time;
- if (cx->type == OMAP3_STATE_C1) {
+ /* Deny idle for C1 */
+ if (state == &dev->states[0]) {
pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle);
pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle);
}
@@ -148,7 +121,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
/* Execute ARM wfi */
omap_sram_idle();
- if (cx->type == OMAP3_STATE_C1) {
+ /* Re-allow idle for C1 */
+ if (state == &dev->states[0]) {
pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle);
pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle);
}
@@ -164,41 +138,53 @@ return_sleep_time:
}
/**
- * next_valid_state - Find next valid c-state
+ * next_valid_state - Find next valid C-state
* @dev: cpuidle device
- * @state: Currently selected c-state
+ * @state: Currently selected C-state
*
* If the current state is valid, it is returned back to the caller.
* Else, this function searches for a lower c-state which is still
- * valid (as defined in omap3_power_states[]).
+ * valid.
+ *
+ * A state is valid if the 'valid' field is enabled and
+ * if it satisfies the enable_off_mode condition.
*/
static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
- struct cpuidle_state *curr)
+ struct cpuidle_state *curr)
{
struct cpuidle_state *next = NULL;
- struct omap3_processor_cx *cx;
+ struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr);
+ u32 mpu_deepest_state = PWRDM_POWER_RET;
+ u32 core_deepest_state = PWRDM_POWER_RET;
- cx = (struct omap3_processor_cx *)cpuidle_get_statedata(curr);
+ if (enable_off_mode) {
+ mpu_deepest_state = PWRDM_POWER_OFF;
+ /*
+ * Erratum i583: valable for ES rev < Es1.2 on 3630.
+ * CORE OFF mode is not supported in a stable form, restrict
+ * instead the CORE state to RET.
+ */
+ if (!IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583))
+ core_deepest_state = PWRDM_POWER_OFF;
+ }
/* Check if current state is valid */
- if (cx->valid) {
+ if ((cx->valid) &&
+ (cx->mpu_state >= mpu_deepest_state) &&
+ (cx->core_state >= core_deepest_state)) {
return curr;
} else {
- u8 idx = OMAP3_STATE_MAX;
+ int idx = OMAP3_NUM_STATES - 1;
- /*
- * Reach the current state starting at highest C-state
- */
- for (; idx >= OMAP3_STATE_C1; idx--) {
+ /* Reach the current state starting at highest C-state */
+ for (; idx >= 0; idx--) {
if (&dev->states[idx] == curr) {
next = &dev->states[idx];
break;
}
}
- /*
- * Should never hit this condition.
- */
+ /* Should never hit this condition */
WARN_ON(next == NULL);
/*
@@ -206,17 +192,17 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
* Start search from the next (lower) state.
*/
idx--;
- for (; idx >= OMAP3_STATE_C1; idx--) {
- struct omap3_processor_cx *cx;
-
+ for (; idx >= 0; idx--) {
cx = cpuidle_get_statedata(&dev->states[idx]);
- if (cx->valid) {
+ if ((cx->valid) &&
+ (cx->mpu_state >= mpu_deepest_state) &&
+ (cx->core_state >= core_deepest_state)) {
next = &dev->states[idx];
break;
}
}
/*
- * C1 and C2 are always valid.
+ * C1 is always valid.
* So, no need to check for 'next==NULL' outside this loop.
*/
}
@@ -229,36 +215,22 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
* @dev: cpuidle device
* @state: The target state to be programmed
*
- * Used for C states with CPUIDLE_FLAG_CHECK_BM flag set. This
- * function checks for any pending activity and then programs the
- * device to the specified or a safer state.
+ * This function checks for any pending activity and then programs
+ * the device to the specified or a safer state.
*/
static int omap3_enter_idle_bm(struct cpuidle_device *dev,
struct cpuidle_state *state)
{
- struct cpuidle_state *new_state = next_valid_state(dev, state);
- u32 core_next_state, per_next_state = 0, per_saved_state = 0;
- u32 cam_state;
- struct omap3_processor_cx *cx;
+ struct cpuidle_state *new_state;
+ u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state;
+ struct omap3_idle_statedata *cx;
int ret;
- if ((state->flags & CPUIDLE_FLAG_CHECK_BM) && omap3_idle_bm_check()) {
- BUG_ON(!dev->safe_state);
+ if (!omap3_can_sleep()) {
new_state = dev->safe_state;
goto select_state;
}
- cx = cpuidle_get_statedata(state);
- core_next_state = cx->core_state;
-
- /*
- * FIXME: we currently manage device-specific idle states
- * for PER and CORE in combination with CPU-specific
- * idle states. This is wrong, and device-specific
- * idle management needs to be separated out into
- * its own code.
- */
-
/*
* Prevent idle completely if CAM is active.
* CAM does not have wakeup capability in OMAP3.
@@ -270,9 +242,19 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev,
}
/*
+ * FIXME: we currently manage device-specific idle states
+ * for PER and CORE in combination with CPU-specific
+ * idle states. This is wrong, and device-specific
+ * idle management needs to be separated out into
+ * its own code.
+ */
+
+ /*
* Prevent PER off if CORE is not in retention or off as this
* would disable PER wakeups completely.
*/
+ cx = cpuidle_get_statedata(state);
+ core_next_state = cx->core_state;
per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd);
if ((per_next_state == PWRDM_POWER_OFF) &&
(core_next_state > PWRDM_POWER_RET))
@@ -282,6 +264,8 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev,
if (per_next_state != per_saved_state)
pwrdm_set_next_pwrst(per_pd, per_next_state);
+ new_state = next_valid_state(dev, state);
+
select_state:
dev->last_state = new_state;
ret = omap3_enter_idle(dev, new_state);
@@ -295,31 +279,6 @@ select_state:
DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev);
-/**
- * omap3_cpuidle_update_states() - Update the cpuidle states
- * @mpu_deepest_state: Enable states up to and including this for mpu domain
- * @core_deepest_state: Enable states up to and including this for core domain
- *
- * This goes through the list of states available and enables and disables the
- * validity of C states based on deepest state that can be achieved for the
- * variable domain
- */
-void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state)
-{
- int i;
-
- for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) {
- struct omap3_processor_cx *cx = &omap3_power_states[i];
-
- if ((cx->mpu_state >= mpu_deepest_state) &&
- (cx->core_state >= core_deepest_state)) {
- cx->valid = 1;
- } else {
- cx->valid = 0;
- }
- }
-}
-
void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params)
{
int i;
@@ -327,212 +286,109 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params)
if (!cpuidle_board_params)
return;
- for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) {
- cpuidle_params_table[i].valid =
- cpuidle_board_params[i].valid;
- cpuidle_params_table[i].sleep_latency =
- cpuidle_board_params[i].sleep_latency;
- cpuidle_params_table[i].wake_latency =
- cpuidle_board_params[i].wake_latency;
- cpuidle_params_table[i].threshold =
- cpuidle_board_params[i].threshold;
+ for (i = 0; i < OMAP3_NUM_STATES; i++) {
+ cpuidle_params_table[i].valid = cpuidle_board_params[i].valid;
+ cpuidle_params_table[i].exit_latency =
+ cpuidle_board_params[i].exit_latency;
+ cpuidle_params_table[i].target_residency =
+ cpuidle_board_params[i].target_residency;
}
return;
}
-/* omap3_init_power_states - Initialises the OMAP3 specific C states.
- *
- * Below is the desciption of each C state.
- * C1 . MPU WFI + Core active
- * C2 . MPU WFI + Core inactive
- * C3 . MPU CSWR + Core inactive
- * C4 . MPU OFF + Core inactive
- * C5 . MPU CSWR + Core CSWR
- * C6 . MPU OFF + Core CSWR
- * C7 . MPU OFF + Core OFF
- */
-void omap_init_power_states(void)
-{
- /* C1 . MPU WFI + Core active */
- omap3_power_states[OMAP3_STATE_C1].valid =
- cpuidle_params_table[OMAP3_STATE_C1].valid;
- omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1;
- omap3_power_states[OMAP3_STATE_C1].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C1].sleep_latency;
- omap3_power_states[OMAP3_STATE_C1].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C1].wake_latency;
- omap3_power_states[OMAP3_STATE_C1].threshold =
- cpuidle_params_table[OMAP3_STATE_C1].threshold;
- omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID;
- omap3_power_states[OMAP3_STATE_C1].desc = "MPU ON + CORE ON";
-
- /* C2 . MPU WFI + Core inactive */
- omap3_power_states[OMAP3_STATE_C2].valid =
- cpuidle_params_table[OMAP3_STATE_C2].valid;
- omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2;
- omap3_power_states[OMAP3_STATE_C2].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C2].sleep_latency;
- omap3_power_states[OMAP3_STATE_C2].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C2].wake_latency;
- omap3_power_states[OMAP3_STATE_C2].threshold =
- cpuidle_params_table[OMAP3_STATE_C2].threshold;
- omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C2].desc = "MPU ON + CORE ON";
-
- /* C3 . MPU CSWR + Core inactive */
- omap3_power_states[OMAP3_STATE_C3].valid =
- cpuidle_params_table[OMAP3_STATE_C3].valid;
- omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3;
- omap3_power_states[OMAP3_STATE_C3].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C3].sleep_latency;
- omap3_power_states[OMAP3_STATE_C3].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C3].wake_latency;
- omap3_power_states[OMAP3_STATE_C3].threshold =
- cpuidle_params_table[OMAP3_STATE_C3].threshold;
- omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET;
- omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C3].desc = "MPU RET + CORE ON";
-
- /* C4 . MPU OFF + Core inactive */
- omap3_power_states[OMAP3_STATE_C4].valid =
- cpuidle_params_table[OMAP3_STATE_C4].valid;
- omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4;
- omap3_power_states[OMAP3_STATE_C4].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C4].sleep_latency;
- omap3_power_states[OMAP3_STATE_C4].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C4].wake_latency;
- omap3_power_states[OMAP3_STATE_C4].threshold =
- cpuidle_params_table[OMAP3_STATE_C4].threshold;
- omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF;
- omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON;
- omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C4].desc = "MPU OFF + CORE ON";
-
- /* C5 . MPU CSWR + Core CSWR*/
- omap3_power_states[OMAP3_STATE_C5].valid =
- cpuidle_params_table[OMAP3_STATE_C5].valid;
- omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5;
- omap3_power_states[OMAP3_STATE_C5].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C5].sleep_latency;
- omap3_power_states[OMAP3_STATE_C5].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C5].wake_latency;
- omap3_power_states[OMAP3_STATE_C5].threshold =
- cpuidle_params_table[OMAP3_STATE_C5].threshold;
- omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET;
- omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET;
- omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C5].desc = "MPU RET + CORE RET";
-
- /* C6 . MPU OFF + Core CSWR */
- omap3_power_states[OMAP3_STATE_C6].valid =
- cpuidle_params_table[OMAP3_STATE_C6].valid;
- omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6;
- omap3_power_states[OMAP3_STATE_C6].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C6].sleep_latency;
- omap3_power_states[OMAP3_STATE_C6].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C6].wake_latency;
- omap3_power_states[OMAP3_STATE_C6].threshold =
- cpuidle_params_table[OMAP3_STATE_C6].threshold;
- omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF;
- omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET;
- omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C6].desc = "MPU OFF + CORE RET";
-
- /* C7 . MPU OFF + Core OFF */
- omap3_power_states[OMAP3_STATE_C7].valid =
- cpuidle_params_table[OMAP3_STATE_C7].valid;
- omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7;
- omap3_power_states[OMAP3_STATE_C7].sleep_latency =
- cpuidle_params_table[OMAP3_STATE_C7].sleep_latency;
- omap3_power_states[OMAP3_STATE_C7].wakeup_latency =
- cpuidle_params_table[OMAP3_STATE_C7].wake_latency;
- omap3_power_states[OMAP3_STATE_C7].threshold =
- cpuidle_params_table[OMAP3_STATE_C7].threshold;
- omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF;
- omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF;
- omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID |
- CPUIDLE_FLAG_CHECK_BM;
- omap3_power_states[OMAP3_STATE_C7].desc = "MPU OFF + CORE OFF";
-
- /*
- * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
- * enable OFF mode in a stable form for previous revisions.
- * we disable C7 state as a result.
- */
- if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) {
- omap3_power_states[OMAP3_STATE_C7].valid = 0;
- cpuidle_params_table[OMAP3_STATE_C7].valid = 0;
- pr_warn("%s: core off state C7 disabled due to i583\n",
- __func__);
- }
-}
-
struct cpuidle_driver omap3_idle_driver = {
.name = "omap3_idle",
.owner = THIS_MODULE,
};
+/* Helper to fill the C-state common data and register the driver_data */
+static inline struct omap3_idle_statedata *_fill_cstate(
+ struct cpuidle_device *dev,
+ int idx, const char *descr)
+{
+ struct omap3_idle_statedata *cx = &omap3_idle_data[idx];
+ struct cpuidle_state *state = &dev->states[idx];
+
+ state->exit_latency = cpuidle_params_table[idx].exit_latency;
+ state->target_residency = cpuidle_params_table[idx].target_residency;
+ state->flags = CPUIDLE_FLAG_TIME_VALID;
+ state->enter = omap3_enter_idle_bm;
+ cx->valid = cpuidle_params_table[idx].valid;
+ sprintf(state->name, "C%d", idx + 1);
+ strncpy(state->desc, descr, CPUIDLE_DESC_LEN);
+ cpuidle_set_statedata(state, cx);
+
+ return cx;
+}
+
/**
* omap3_idle_init - Init routine for OMAP3 idle
*
- * Registers the OMAP3 specific cpuidle driver with the cpuidle
+ * Registers the OMAP3 specific cpuidle driver to the cpuidle
* framework with the valid set of states.
*/
int __init omap3_idle_init(void)
{
- int i, count = 0;
- struct omap3_processor_cx *cx;
- struct cpuidle_state *state;
struct cpuidle_device *dev;
+ struct omap3_idle_statedata *cx;
mpu_pd = pwrdm_lookup("mpu_pwrdm");
core_pd = pwrdm_lookup("core_pwrdm");
per_pd = pwrdm_lookup("per_pwrdm");
cam_pd = pwrdm_lookup("cam_pwrdm");
- omap_init_power_states();
cpuidle_register_driver(&omap3_idle_driver);
-
dev = &per_cpu(omap3_idle_dev, smp_processor_id());
- for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) {
- cx = &omap3_power_states[i];
- state = &dev->states[count];
-
- if (!cx->valid)
- continue;
- cpuidle_set_statedata(state, cx);
- state->exit_latency = cx->sleep_latency + cx->wakeup_latency;
- state->target_residency = cx->threshold;
- state->flags = cx->flags;
- state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ?
- omap3_enter_idle_bm : omap3_enter_idle;
- if (cx->type == OMAP3_STATE_C1)
- dev->safe_state = state;
- sprintf(state->name, "C%d", count+1);
- strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
- count++;
- }
+ /* C1 . MPU WFI + Core active */
+ cx = _fill_cstate(dev, 0, "MPU ON + CORE ON");
+ (&dev->states[0])->enter = omap3_enter_idle;
+ dev->safe_state = &dev->states[0];
+ cx->valid = 1; /* C1 is always valid */
+ cx->mpu_state = PWRDM_POWER_ON;
+ cx->core_state = PWRDM_POWER_ON;
- if (!count)
- return -EINVAL;
- dev->state_count = count;
+ /* C2 . MPU WFI + Core inactive */
+ cx = _fill_cstate(dev, 1, "MPU ON + CORE ON");
+ cx->mpu_state = PWRDM_POWER_ON;
+ cx->core_state = PWRDM_POWER_ON;
+
+ /* C3 . MPU CSWR + Core inactive */
+ cx = _fill_cstate(dev, 2, "MPU RET + CORE ON");
+ cx->mpu_state = PWRDM_POWER_RET;
+ cx->core_state = PWRDM_POWER_ON;
- if (enable_off_mode)
- omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF);
- else
- omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET);
+ /* C4 . MPU OFF + Core inactive */
+ cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON");
+ cx->mpu_state = PWRDM_POWER_OFF;
+ cx->core_state = PWRDM_POWER_ON;
+
+ /* C5 . MPU RET + Core RET */
+ cx = _fill_cstate(dev, 4, "MPU RET + CORE RET");
+ cx->mpu_state = PWRDM_POWER_RET;
+ cx->core_state = PWRDM_POWER_RET;
+
+ /* C6 . MPU OFF + Core RET */
+ cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET");
+ cx->mpu_state = PWRDM_POWER_OFF;
+ cx->core_state = PWRDM_POWER_RET;
+
+ /* C7 . MPU OFF + Core OFF */
+ cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF");
+ /*
+ * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
+ * enable OFF mode in a stable form for previous revisions.
+ * We disable C7 state as a result.
+ */
+ if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) {
+ cx->valid = 0;
+ pr_warn("%s: core off state C7 disabled due to i583\n",
+ __func__);
+ }
+ cx->mpu_state = PWRDM_POWER_OFF;
+ cx->core_state = PWRDM_POWER_OFF;
+ dev->state_count = OMAP3_NUM_STATES;
if (cpuidle_register_device(dev)) {
printk(KERN_ERR "%s: CPUidle register device failed\n",
__func__);
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index 797bfd12b64..45bcfce7735 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -36,11 +36,16 @@ static inline int omap4_opp_init(void)
}
#endif
+/*
+ * cpuidle mach specific parameters
+ *
+ * The board code can override the default C-states definition using
+ * omap3_pm_init_cpuidle
+ */
struct cpuidle_params {
- u8 valid;
- u32 sleep_latency;
- u32 wake_latency;
- u32 threshold;
+ u32 exit_latency; /* exit_latency = sleep + wake-up latencies */
+ u32 target_residency;
+ u8 valid; /* validates the C-state */
};
#if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE)
@@ -73,10 +78,6 @@ extern u32 sleep_while_idle;
#define sleep_while_idle 0
#endif
-#if defined(CONFIG_CPU_IDLE)
-extern void omap3_cpuidle_update_states(u32, u32);
-#endif
-
#if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS)
extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev);
extern int pm_dbg_regset_save(int reg_set);
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 0c5e3a46a3a..c155c9d1c82 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -779,18 +779,6 @@ void omap3_pm_off_mode_enable(int enable)
else
state = PWRDM_POWER_RET;
-#ifdef CONFIG_CPU_IDLE
- /*
- * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
- * enable OFF mode in a stable form for previous revisions, restrict
- * instead to RET
- */
- if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583))
- omap3_cpuidle_update_states(state, PWRDM_POWER_RET);
- else
- omap3_cpuidle_update_states(state, state);
-#endif
-
list_for_each_entry(pwrst, &pwrst_list, node) {
if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) &&
pwrst->pwrdm == core_pwrdm &&
@@ -895,8 +883,6 @@ static int __init omap3_pm_init(void)
pm_errata_configure();
- printk(KERN_ERR "Power Management for TI OMAP3.\n");
-
/* XXX prcm_setup_regs needs to be before enabling hw
* supervised mode for powerdomains */
prcm_setup_regs();
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 425dfa5d6e1..bb571bcdb8f 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
size = 0x1f;
}
- gpch->regs = ioremap_nocache(AR7_REGS_GPIO,
- AR7_REGS_GPIO + 0x10);
-
+ gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
if (!gpch->regs) {
printk(KERN_ERR "%s: failed to ioremap regs\n",
gpch->chip.label);
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 655f849bd08..7aa37ddfca4 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -5,7 +5,9 @@
#include <asm/cache.h>
#include <asm-generic/dma-coherent.h>
+#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */
#include <dma-coherence.h>
+#endif
extern struct dma_map_ops *mips_dma_map_ops;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d8..e9b3af27d84 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
unsigned long dvpret = dvpe();
#endif /* CONFIG_MIPS_MT_SMTC */
- notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV);
+ if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
+ sig = 0;
console_verbose();
spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
mips_mt_regdump(dvpret);
#endif /* CONFIG_MIPS_MT_SMTC */
- if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
- sig = 0;
-
printk("%s[#%d]:\n", str, ++die_counter);
show_registers(regs);
add_taint(TAINT_DIE);
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index 37de05d595e..6c47dfeb7be 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void)
struct resource *r;
r = rb532_gpio_reg0_res;
- rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start);
+ rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
if (!rb532_gpio_chip->regbase) {
printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cf..104faa8aa23 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
.end = mpc83xx_suspend_end,
};
+static struct of_device_id pmc_match[];
static int pmc_probe(struct platform_device *ofdev)
{
+ const struct of_device_id *match;
struct device_node *np = ofdev->dev.of_node;
struct resource res;
struct pmc_type *type;
int ret = 0;
- if (!ofdev->dev.of_match)
+ match = of_match_device(pmc_match, &ofdev->dev);
+ if (!match)
return -EINVAL;
- type = ofdev->dev.of_match->data;
+ type = match->data;
if (!of_device_is_available(np))
return -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20..01cd2f08951 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
return 0;
}
+static const struct of_device_id fsl_of_msi_ids[];
static int __devinit fsl_of_msi_probe(struct platform_device *dev)
{
+ const struct of_device_id *match;
struct fsl_msi *msi;
struct resource res;
int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
u32 offset;
static const u32 all_avail[] = { 0, NR_MSI_IRQS };
- if (!dev->dev.of_match)
+ match = of_match_device(fsl_of_msi_ids, &dev->dev);
+ if (!match)
return -EINVAL;
- features = dev->dev.of_match->data;
+ features = match->data;
printk(KERN_DEBUG "Setting up Freescale MSI support\n");
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index f679c57644d..1e34f29e58b 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op)
return 0;
}
-static struct of_device_id __initdata apc_match[] = {
+static struct of_device_id apc_match[] = {
{
.name = APC_OBPNAME,
},
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083f..d1840dbdaa2 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
sabre_scan_bus(pbm, &op->dev);
}
+static const struct of_device_id sabre_match[];
static int __devinit sabre_probe(struct platform_device *op)
{
+ const struct of_device_id *match;
const struct linux_prom64_registers *pr_regs;
struct device_node *dp = op->dev.of_node;
struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
const u32 *vdma;
u64 clear_irq;
- hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+ match = of_match_device(sabre_match, &op->dev);
+ hummingbird_p = match && (match->data != NULL);
if (!hummingbird_p) {
struct device_node *cpu_dp;
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c..283fbc329a4 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
return err;
}
+static const struct of_device_id schizo_match[];
static int __devinit schizo_probe(struct platform_device *op)
{
- if (!op->dev.of_match)
+ const struct of_device_id *match;
+
+ match = of_match_device(schizo_match, &op->dev);
+ if (!match)
return -EINVAL;
- return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+ return __schizo_init(op, (unsigned long)match->data);
}
/* The ordering of this table is very important. Some Tomatillo
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 93d7b4465f8..6a585d39358 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op)
return 0;
}
-static struct of_device_id __initdata pmc_match[] = {
+static struct of_device_id pmc_match[] = {
{
.name = PMC_OBPNAME,
},
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb7085..850a1360c0d 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
void __cpuinit smp_store_cpu_info(int id)
{
int cpu_node;
+ int mid;
cpu_data(id).udelay_val = loops_per_jiffy;
@@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id)
cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
"clock-frequency", 0);
cpu_data(id).prom_node = cpu_node;
- cpu_data(id).mid = cpu_get_hwmid(cpu_node);
+ mid = cpu_get_hwmid(cpu_node);
- if (cpu_data(id).mid < 0)
- panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
+ if (mid < 0) {
+ printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node);
+ mid = 0;
+ }
+ cpu_data(id).mid = mid;
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 4e236391b63..96046a4024c 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op)
return 0;
}
-static struct of_device_id __initdata clock_match[] = {
+static struct of_device_id clock_match[] = {
{
.name = "eeprom",
},
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 3632cb34e91..0084c3361e1 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -289,10 +289,16 @@ cc_end_cruft:
/* Also, handle the alignment code out of band. */
cc_dword_align:
- cmp %g1, 6
- bl,a ccte
+ cmp %g1, 16
+ bge 1f
+ srl %g1, 1, %o3
+2: cmp %o3, 0
+ be,a ccte
andcc %g1, 0xf, %o3
- andcc %o0, 0x1, %g0
+ andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits)
+ be,a 2b
+ srl %o3, 1, %o3
+1: andcc %o0, 0x1, %g0
bne ccslow
andcc %o0, 0x2, %g0
be 1f
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 6ea77979531..42827cafa6a 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -5,6 +5,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len)
host.release, host.version, host.machine);
}
+/*
+ * We cannot use glibc's abort(). It makes use of tgkill() which
+ * has no effect within UML's kernel threads.
+ * After that glibc would execute an invalid instruction to kill
+ * the calling process and UML crashes with SIGSEGV.
+ */
+static inline void __attribute__ ((noreturn)) uml_abort(void)
+{
+ sigset_t sig;
+
+ fflush(NULL);
+
+ if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT))
+ sigprocmask(SIG_UNBLOCK, &sig, 0);
+
+ for (;;)
+ if (kill(getpid(), SIGABRT) < 0)
+ exit(127);
+}
+
void os_dump_core(void)
{
int pid;
@@ -116,5 +137,5 @@ void os_dump_core(void)
while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0)
os_kill_ptraced_process(pid, 0);
- abort();
+ uml_abort();
}
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3..34595d5e103 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
#define APIC_DM_LOWEST 0x00100
#define APIC_DM_SMI 0x00200
#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f3..d56187c6b83 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c..130f1eeee5f 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
/* after this # consecutive successes, bump up the throttle if it was lowered */
#define COMPLETE_THRESHOLD 5
+#define UV_LB_SUBNODEID 0x10
+
/*
* number of entries in the destination side payload queue
*/
@@ -124,7 +126,7 @@
* The distribution specification (32 bytes) is interpreted as a 256-bit
* distribution vector. Adjacent bits correspond to consecutive even numbered
* nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nodeid' field of the header corresponds to the
+ * 'base_dest_nasid' field of the header corresponds to the
* destination nodeID associated with that specified bit.
*/
struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
- unsigned int base_dest_nodeid:15; /* nasid of the */
+ unsigned int base_dest_nasid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
unsigned long d_rcanceled; /* number of messages canceled by resets */
};
+struct hub_and_pnode {
+ short uvhub;
+ short pnode;
+};
/*
* one per-cpu; to locate the software tables
*/
@@ -399,10 +405,12 @@ struct bau_control {
int baudisabled;
int set_bau_off;
short cpu;
+ short osnode;
short uvhub_cpu;
short uvhub;
short cpus_in_socket;
short cpus_in_uvhub;
+ short partition_base_pnode;
unsigned short message_number;
unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
int congested_period;
cycles_t period_time;
long period_requests;
+ struct hub_and_pnode *target_hub_and_pnode;
};
static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
{
return constant_test_bit(uvhub, &dstp->bits[0]);
}
-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
+static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
{
- __set_bit(uvhub, &dstp->bits[0]);
+ __set_bit(pnode, &dstp->bits[0]);
}
static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c233..4298002d0c8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
unsigned short nr_online_cpus;
unsigned short pnode;
short memory_nid;
+ spinlock_t nmi_lock;
+ unsigned long nmi_count;
};
extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac745..f5bb64a823d 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
*
* SGI UV MMR definitions
*
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
} s;
};
+/* ========================================================================= */
+/* UVH_SCRATCH5 */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+ unsigned long v;
+ struct uvh_scratch5_s {
+ unsigned long scratch5 : 64; /* RW, W1CS */
+ } s;
+};
#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad..d3d859035af 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
};
/**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
+ *
+ * For more details on the purpose of this hook, look in
+ * init_memory_mapping and the commit that added it.
+ */
+struct x86_init_mapping {
+ void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
* @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
+ struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc09..7acd2d2ac96 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
#include <asm/smp.h>
#include <asm/x86_init.h>
#include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK (1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
DEFINE_PER_CPU(int, x2apic_extra_bits);
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
*/
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
+ unsigned long real_uv_nmi;
+ int bid;
+
if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK;
if (in_crash_kexec)
/* do nothing if entering the crash kernel */
return NOTIFY_OK;
+
/*
- * Use a lock so only one cpu prints at a time
- * to prevent intermixed output.
+ * Each blade has an MMR that indicates when an NMI has been sent
+ * to cpus on the blade. If an NMI is detected, atomically
+ * clear the MMR and update a per-blade NMI count used to
+ * cause each cpu on the blade to notice a new NMI.
+ */
+ bid = uv_numa_blade_id();
+ real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+ if (unlikely(real_uv_nmi)) {
+ spin_lock(&uv_blade_info[bid].nmi_lock);
+ real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+ if (real_uv_nmi) {
+ uv_blade_info[bid].nmi_count++;
+ uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+ }
+ spin_unlock(&uv_blade_info[bid].nmi_lock);
+ }
+
+ if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+ return NOTIFY_DONE;
+
+ __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+ /*
+ * Use a lock so only one cpu prints at a time.
+ * This prevents intermixed output.
*/
spin_lock(&uv_nmi_lock);
- pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+ pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
dump_stack();
spin_unlock(&uv_nmi_lock);
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
}
static struct notifier_block uv_dump_stack_nmi_nb = {
- .notifier_call = uv_handle_nmi
+ .notifier_call = uv_handle_nmi,
+ .priority = NMI_LOCAL_LOW_PRIOR - 1,
};
void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
- uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+ uv_blade_info = kzalloc(bytes, GFP_KERNEL);
BUG_ON(!uv_blade_info);
+
for (blade = 0; blade < uv_num_possible_blades(); blade++)
uv_blade_info[blade].memory_nid = -1;
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
+ spin_lock_init(&uv_blade_info[blade].nmi_lock);
max_pnode = max(pnode, max_pnode);
blade++;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb9eb29a52d..6f9d1f6063e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
#endif
/* As a rule processors have APIC timer running in deep C states */
- if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+ if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
set_cpu_cap(c, X86_FEATURE_ARAT);
/*
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
*/
const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
EXPORT_SYMBOL_GPL(amd_erratum_400);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596..bb0adad3514 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
out_free:
if (b) {
kobject_put(&b->kobj);
+ list_del(&b->miscj);
kfree(b);
}
return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..0f034460260 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ h = lvtthmr_init;
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
* sequence to them and LVT registers are reset to 0s except for
* the mask bits which are set to 1s when APs receive INIT IPI.
- * Always restore the value that BIOS has programmed on AP based on
- * BSP's info we saved since BIOS is always setting the same value
- * for all threads/cores
+ * If BIOS takes over the thermal interrupt and sets its interrupt
+ * delivery mode to SMI (not fixed), it restores the value that the
+ * BIOS has programmed on AP based on BSP's info we saved since BIOS
+ * is always setting the same value for all threads/cores.
*/
- apic_write(APIC_LVTTHMR, lvtthmr_init);
+ if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+ apic_write(APIC_LVTTHMR, lvtthmr_init);
- h = lvtthmr_init;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d156..f1a6244d7d9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long flags;
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
- preempt_disable();
+ local_irq_save(flags);
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
} else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- preempt_enable_no_resched();
+ local_irq_restore(flags);
}
static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128..75ef4b18e9b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},
+ .mapping = {
+ .pagetable_reserve = native_pagetable_reserve,
+ },
+
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039..37b8b0fe832 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}
+void __init native_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
struct map_range {
unsigned long start;
unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
__flush_tlb_all();
+ /*
+ * Reserve the kernel pagetable pages we used (pgt_buf_start -
+ * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+ * so that they can be reused for other purposes.
+ *
+ * On native it just means calling memblock_x86_reserve_range, on Xen it
+ * also means marking RW the pagetable pages that we allocated before
+ * but that haven't been used.
+ *
+ * In fact on xen we mark RO the whole range pgt_buf_start -
+ * pgt_buf_top, because we have to make sure that when
+ * init_memory_mapping reaches the pagetable pages area, it maps
+ * RO all the pagetable pages, including the ones that are beyond
+ * pgt_buf_end at that time.
+ */
if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
- pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_end));
if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f..c58e0ea39ef 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{
- int tcpu;
- int uvhub;
int locals = 0;
int remotes = 0;
int hubs = 0;
+ int tcpu;
+ int tpnode;
struct bau_desc *bau_desc;
struct cpumask *flush_mask;
struct ptc_stats *stat;
struct bau_control *bcp;
struct bau_control *tbcp;
+ struct hub_and_pnode *hpp;
/* kernel was booted 'nobau' */
if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
- /* cpu statistics */
for_each_cpu(tcpu, flush_mask) {
- uvhub = uv_cpu_to_blade_id(tcpu);
- bau_uvhub_set(uvhub, &bau_desc->distribution);
- if (uvhub == bcp->uvhub)
+ /*
+ * The distribution vector is a bit map of pnodes, relative
+ * to the partition base pnode (and the partition base nasid
+ * in the header).
+ * Translate cpu to pnode and hub using an array stored
+ * in local memory.
+ */
+ hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
+ tpnode = hpp->pnode - bcp->partition_base_pnode;
+ bau_uvhub_set(tpnode, &bau_desc->distribution);
+ if (hpp->uvhub == bcp->uvhub)
locals++;
else
remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
* an interrupt, but causes an error message to be returned to
* the sender.
*/
-static void uv_enable_timeouts(void)
+static void __init uv_enable_timeouts(void)
{
int uvhub;
int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
}
/*
- * initialize the sending side's sending buffers
+ * Initialize the sending side's sending buffers.
*/
static void
-uv_activation_descriptor_init(int node, int pnode)
+uv_activation_descriptor_init(int node, int pnode, int base_pnode)
{
int i;
int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
n = pa >> uv_nshift;
m = pa & uv_mmask;
+ /* the 14-bit pnode */
uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
(n << UV_DESC_BASE_PNODE_SHIFT | m));
-
/*
- * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+ * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
* cpu even though we only use the first one; one descriptor can
* describe a broadcast to 256 uv hubs.
*/
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
- * base_dest_nodeid is the nasid of the first uvhub
- * in the partition. The bit map will indicate uvhub numbers,
- * which are 0-N in a partition. Pnodes are unique system-wide.
+ * The base_dest_nasid set in the message header is the nasid
+ * of the first uvhub in the partition. The bit map will
+ * indicate destination pnode numbers relative to that base.
+ * They may not be consecutive if nasid striding is being used.
*/
- bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
- bd2->header.dest_subnodeid = 0x10; /* the LB */
+ bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
+ bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
/*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
/*
* Initialization of each UV hub's structures
*/
-static void __init uv_init_uvhub(int uvhub, int vector)
+static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
{
int node;
int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
node = uvhub_to_first_node(uvhub);
pnode = uv_blade_to_pnode(uvhub);
- uv_activation_descriptor_init(node, pnode);
+ uv_activation_descriptor_init(node, pnode, base_pnode);
uv_payload_queue_init(node, pnode);
/*
- * the below initialization can't be in firmware because the
- * messaging IRQ will be determined by the OS
+ * The below initialization can't be in firmware because the
+ * messaging IRQ will be determined by the OS.
*/
apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
/*
* initialize the bau_control structure for each cpu
*/
-static int __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
{
int i;
int cpu;
+ int tcpu;
int pnode;
int uvhub;
int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
bcp = &per_cpu(bau_control, cpu);
memset(bcp, 0, sizeof(struct bau_control));
pnode = uv_cpu_hub_info(cpu)->pnode;
+ if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
+ printk(KERN_EMERG
+ "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
+ cpu, pnode, base_part_pnode,
+ UV_DISTRIBUTION_SIZE);
+ return 1;
+ }
+ bcp->osnode = cpu_to_node(cpu);
+ bcp->partition_base_pnode = uv_partition_base_pnode;
uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
bdp->pnode = pnode;
/* kludge: 'assuming' one node per socket, and assuming that
disabling a socket just leaves a gap in node numbers */
- socket = (cpu_to_node(cpu) & 1);
+ socket = bcp->osnode & 1;
bdp->socket_mask |= (1 << socket);
sdp = &bdp->socket[socket];
sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
nextsocket:
socket++;
socket_mask = (socket_mask >> 1);
+ /* each socket gets a local array of pnodes/hubs */
+ bcp = smaster;
+ bcp->target_hub_and_pnode = kmalloc_node(
+ sizeof(struct hub_and_pnode) *
+ num_possible_cpus(), GFP_KERNEL, bcp->osnode);
+ memset(bcp->target_hub_and_pnode, 0,
+ sizeof(struct hub_and_pnode) *
+ num_possible_cpus());
+ for_each_present_cpu(tcpu) {
+ bcp->target_hub_and_pnode[tcpu].pnode =
+ uv_cpu_hub_info(tcpu)->pnode;
+ bcp->target_hub_and_pnode[tcpu].uvhub =
+ uv_cpu_hub_info(tcpu)->numa_blade_id;
+ }
}
}
kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
spin_lock_init(&disable_lock);
congested_cycles = microsec_2_cycles(congested_response_us);
- if (uv_init_per_cpu(nuvhubs)) {
- nobau = 1;
- return 0;
- }
-
uv_partition_base_pnode = 0x7fffffff;
- for (uvhub = 0; uvhub < nuvhubs; uvhub++)
+ for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
if (uv_blade_nr_possible_cpus(uvhub) &&
(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
+ }
+
+ if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
+ nobau = 1;
+ return 0;
+ }
vector = UV_BAU_MESSAGE;
for_each_possible_blade(uvhub)
if (uv_blade_nr_possible_cpus(uvhub))
- uv_init_uvhub(uvhub, vector);
+ uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
uv_enable_timeouts();
alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 55c965b38c2..0684f3c74d5 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
{
}
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ native_pagetable_reserve(start, end);
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);
static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
return ret;
}
-#ifdef CONFIG_X86_64
-static __initdata u64 __last_pgt_set_rw = 0;
-static __initdata u64 __pgt_buf_start = 0;
-static __initdata u64 __pgt_buf_end = 0;
-static __initdata u64 __pgt_buf_top = 0;
-/*
- * As a consequence of the commit:
- *
- * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
- * Author: Yinghai Lu <yinghai@kernel.org>
- * Date: Fri Dec 17 16:58:28 2010 -0800
- *
- * x86-64, mm: Put early page table high
- *
- * at some point init_memory_mapping is going to reach the pagetable pages
- * area and map those pages too (mapping them as normal memory that falls
- * in the range of addresses passed to init_memory_mapping as argument).
- * Some of those pages are already pagetable pages (they are in the range
- * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
- * everything is fine.
- * Some of these pages are not pagetable pages yet (they fall in the range
- * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
- * are going to be mapped RW. When these pages become pagetable pages and
- * are hooked into the pagetable, xen will find that the guest has already
- * a RW mapping of them somewhere and fail the operation.
- * The reason Xen requires pagetables to be RO is that the hypervisor needs
- * to verify that the pagetables are valid before using them. The validation
- * operations are called "pinning".
- *
- * In order to fix the issue we mark all the pages in the entire range
- * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
- * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
- * init_memory_mapping. Hence the kernel is going to crash as soon as one
- * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
- * ranges are RO).
- *
- * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
- * the init_memory_mapping has completed (in a perfect world we would
- * call this function from init_memory_mapping, but lets ignore that).
- *
- * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
- * end,top] have all changed to new values (b/c init_memory_mapping
- * is called and setting up another new page-table). Hence, the first time
- * we enter this function, we save away the pgt_buf_start value and update
- * the pgt_buf_[end,top].
- *
- * When we detect that the "old" pgt_buf_start through pgt_buf_end
- * PFNs have been reserved (so memblock_x86_reserve_range has been called),
- * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
- *
- * And then we update those "old" pgt_buf_[end|top] with the new ones
- * so that we can redo this on the next pagetable.
- */
-static __init void mark_rw_past_pgt(void) {
-
- if (pgt_buf_end > pgt_buf_start) {
- u64 addr, size;
-
- /* Save it away. */
- if (!__pgt_buf_start) {
- __pgt_buf_start = pgt_buf_start;
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- return;
- }
- /* If we get the range that starts at __pgt_buf_end that means
- * the range is reserved, and that in 'init_memory_mapping'
- * the 'memblock_x86_reserve_range' has been called with the
- * outdated __pgt_buf_start, __pgt_buf_end (the "new"
- * pgt_buf_[start|end|top] refer now to a new pagetable.
- * Note: we are called _after_ the pgt_buf_[..] have been
- * updated.*/
-
- addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
- &size, PAGE_SIZE);
-
- /* Still not reserved, meaning 'memblock_x86_reserve_range'
- * hasn't been called yet. Update the _end and _top.*/
- if (addr == PFN_PHYS(__pgt_buf_start)) {
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- return;
- }
-
- /* OK, the area is reserved, meaning it is time for us to
- * set RW for the old end->top PFNs. */
-
- /* ..unless we had already done this. */
- if (__pgt_buf_end == __last_pgt_set_rw)
- return;
-
- addr = PFN_PHYS(__pgt_buf_end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
- PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
-
- while (addr < PFN_PHYS(__pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(addr));
- addr += PAGE_SIZE;
- }
- /* And update everything so that we are ready for the next
- * pagetable (the one created for regions past 4GB) */
- __last_pgt_set_rw = __pgt_buf_end;
- __pgt_buf_start = pgt_buf_start;
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- }
- return;
-}
-#else
-static __init void mark_rw_past_pgt(void) { }
-#endif
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
@@ -1602,14 +1503,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
unsigned long pfn = pte_pfn(pte);
/*
- * A bit of optimization. We do not need to call the workaround
- * when xen_set_pte_init is called with a PTE with 0 as PFN.
- * That is b/c the pagetable at that point are just being populated
- * with empty values and we can save some cycles by not calling
- * the 'memblock' code.*/
- if (pfn)
- mark_rw_past_pgt();
- /*
* If the new pfn is within the range of the newly allocated
* kernel pagetable, and it isn't being mapped into an
* early_ioremap fixmap slot as a freshly allocated page, make sure
@@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void)
static __init void xen_post_allocator_init(void)
{
- mark_rw_past_pgt();
-
#ifdef CONFIG_XEN_DEBUG
pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
#endif
@@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;