From 8d165db10772f238103c3e8f955c54145e5c07f3 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 10 May 2009 13:37:36 +0000 Subject: powerpc: Improve decrementer accuracy I have been looking at sources of OS jitter and notice that after a long NO_HZ idle period we wakeup too early: relative time (us) event timer irq exit 999946.405 timer irq entry 4.835 timer irq exit 21.685 timer irq entry 3.540 timer (tick_sched_timer) entry Here we slept for just under a second then took a timer interrupt that did nothing. 21.685 us later we wake up again and do the work. We set a rather low shift value of 16 for the decrementer clockevent, which I think is causing this issue. On this box we have a 207MHz decrementer and see: clockevent: decrementer mult[3501] shift[16] cpu[0] For calculations of large intervals this mult/shift combination could be off by a significant amount. I notice the sparc code has a loop that iterates to find a mult/shift combination that maximises the shift value while keeping mult under 32bit. With the patch below we get: clockevent: decrementer mult[35015c20] shift[32] cpu[15] And we no longer see the spurious wakeups. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/time.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel/time.c') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 48571ac56fb..bee1443da76 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -109,7 +109,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, static struct clock_event_device decrementer_clockevent = { .name = "decrementer", .rating = 200, - .shift = 16, + .shift = 0, /* To be filled in */ .mult = 0, /* To be filled in */ .irq = 0, .set_next_event = decrementer_set_next_event, @@ -843,6 +843,22 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } +static void __init setup_clockevent_multiplier(unsigned long hz) +{ + u64 mult, shift = 32; + + while (1) { + mult = div_sc(hz, NSEC_PER_SEC, shift); + if (mult && (mult >> 32UL) == 0UL) + break; + + shift--; + } + + decrementer_clockevent.shift = shift; + decrementer_clockevent.mult = mult; +} + static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; @@ -860,8 +876,7 @@ static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); - decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC, - decrementer_clockevent.shift); + setup_clockevent_multiplier(ppc_tb_freq); decrementer_clockevent.max_delta_ns = clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = -- cgit v1.2.3-70-g09d2 From 177996e6e20f15004d6757d9b859f57d181ef443 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 9 Jun 2009 21:12:00 +0000 Subject: powerpc: Don't do generic calibrate_delay() Currently we are wasting time calling the generic calibrate_delay() function. We don't need it since our implementation of __delay() is based on the CPU timebase. So instead, we use our own small implementation that initializes loops_per_jiffy to something sensible to make the few users like spinlock debug be happy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 4 ---- arch/powerpc/kernel/time.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel/time.c') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 93a61898b25..eae0c2bbbf3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -93,10 +93,6 @@ config GENERIC_HWEIGHT bool default y -config GENERIC_CALIBRATE_DELAY - bool - default y - config GENERIC_FIND_NEXT_BIT bool default y diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bee1443da76..15391c2ab01 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -1143,6 +1144,15 @@ void div128_by_32(u64 dividend_high, u64 dividend_low, } +/* We don't need to calibrate delay, we use the CPU timebase for that */ +void calibrate_delay(void) +{ + /* Some generic code (such as spinlock debug) use loops_per_jiffy + * as the number of __delay(1) in a jiffy, so make it so + */ + loops_per_jiffy = tb_ticks_per_jiffy; +} + static int __init rtc_init(void) { struct platform_device *pdev; -- cgit v1.2.3-70-g09d2