From 62f082830d63cf753ed0dab16f8d3b2d0ffc7f43 Mon Sep 17 00:00:00 2001 From: Tkhai Kirill Date: Wed, 4 Apr 2012 21:49:26 +0200 Subject: sparc32: generic clockevent support The kernel uses l14 timers as clockevents. l10 timer is used as clocksource if platform master_l10_counter isn't constantly zero. The clocksource is continuous, so it's possible to use high resolution timers. l10 timer is also used as clockevent on UP configurations. This realization is for sun4m, sun4d, sun4c, microsparc-IIep and LEON platforms. The appropriate LEON changes was made by Konrad Eisele. In case of sun4m's oneshot mode, profile irq is zeroed in smp4m_percpu_timer_interrupt(). It is maybe needless (double, triple etc overflow does nothing). sun4d is able to have oneshot mode too, but I haven't any way to test it. So code of its percpu timer handler is made as much equal to the current code as possible. The patch is tested on sun4m box in SMP mode by me, and tested by Konrad on leon in up mode (leon smp is broken atm - due to other reasons). Signed-off-by: Tkhai Kirill Tested-by: Konrad Eisele [leon up] [sam: revised patch to provide generic support for leon] Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/kernel.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/sparc/kernel/kernel.h') diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index fd6c36b1df7..8abbad38e34 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -47,8 +47,6 @@ extern void init_IRQ(void); extern void sun4c_init_IRQ(void); /* sun4m_irq.c */ -extern unsigned int lvl14_resolution; - extern void sun4m_init_IRQ(void); extern void sun4m_unmask_profile_irq(void); extern void sun4m_clear_profile_irq(int cpu); -- cgit v1.2.3-70-g09d2 From 2c1cfb2db61474040a394962872f4cde613f89fb Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 11 May 2012 11:35:04 +0000 Subject: sparc32: drop sun4c support Machines with sun4c support are very rare these days, and noone is using them for any practical purposes. The sun4c support has been know broken for quite some time too. So rather than trying to keep it up-to-date, lets get rid of it. This allows us to do some very welcome cleanup of sparc32 support. Updated the former sun4c specifc nmi (which was also used for sun4m UP) to be a generic UP NMI. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/head_32.h | 10 - arch/sparc/include/asm/oplib_32.h | 8 - arch/sparc/kernel/Makefile | 2 +- arch/sparc/kernel/entry.S | 20 +- arch/sparc/kernel/head_32.S | 7 +- arch/sparc/kernel/irq_32.c | 5 - arch/sparc/kernel/kernel.h | 3 - arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/sun4c_irq.c | 269 ----- arch/sparc/mm/Makefile | 4 - arch/sparc/mm/sun4c.c | 2166 ------------------------------------- arch/sparc/prom/Makefile | 1 - arch/sparc/prom/segment.c | 28 - 13 files changed, 11 insertions(+), 2514 deletions(-) delete mode 100644 arch/sparc/kernel/sun4c_irq.c delete mode 100644 arch/sparc/mm/sun4c.c delete mode 100644 arch/sparc/prom/segment.c (limited to 'arch/sparc/kernel/kernel.h') diff --git a/arch/sparc/include/asm/head_32.h b/arch/sparc/include/asm/head_32.h index 7c35491a8b5..5ceb7faee3f 100644 --- a/arch/sparc/include/asm/head_32.h +++ b/arch/sparc/include/asm/head_32.h @@ -80,16 +80,6 @@ #define TRAP_ENTRY_INTERRUPT(int_level) \ mov int_level, %l7; rd %psr, %l0; b real_irq_entry; rd %wim, %l3; -/* NMI's (Non Maskable Interrupts) are special, you can't keep them - * from coming in, and basically if you get one, the shows over. ;( - * On the sun4c they are usually asynchronous memory errors, on the - * the sun4m they could be either due to mem errors or a software - * initiated interrupt from the prom/kern on an SMP box saying "I - * command you to do CPU tricks, read your mailbox for more info." - */ -#define NMI_TRAP \ - rd %wim, %l3; b linux_trap_nmi_sun4c; mov %psr, %l0; nop; - /* Window overflows/underflows are special and we need to try to be as * efficient as possible here.... */ diff --git a/arch/sparc/include/asm/oplib_32.h b/arch/sparc/include/asm/oplib_32.h index 71e5e9aeb67..27517879a6c 100644 --- a/arch/sparc/include/asm/oplib_32.h +++ b/arch/sparc/include/asm/oplib_32.h @@ -105,14 +105,6 @@ extern void prom_write(const char *buf, unsigned int len); extern int prom_startcpu(int cpunode, struct linux_prom_registers *context_table, int context, char *program_counter); -/* Sun4/sun4c specific memory-management startup hook. */ - -/* Map the passed segment in the given context at the passed - * virtual address. - */ -extern void prom_putsegment(int context, unsigned long virt_addr, - int physical_segment); - /* Initialize the memory lists based upon the prom version. */ void prom_meminit(void); diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index cb85458f89d..bfb93c37313 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -28,7 +28,7 @@ obj-y += traps_$(BITS).o # IRQ obj-y += irq_$(BITS).o -obj-$(CONFIG_SPARC32) += sun4m_irq.o sun4c_irq.o sun4d_irq.o +obj-$(CONFIG_SPARC32) += sun4m_irq.o sun4d_irq.o obj-y += process_$(BITS).o obj-y += signal_$(BITS).o diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index f445e98463e..e0e0b810922 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -317,8 +317,8 @@ maybe_smp4m_msg_out: RESTORE_ALL .align 4 - .globl linux_trap_ipi15_sun4m -linux_trap_ipi15_sun4m: + .globl linux_trap_ipi15 +linux_trap_ipi15: SAVE_ALL sethi %hi(0x80000000), %o2 GET_PROCESSOR4M_ID(o0) @@ -760,20 +760,12 @@ setcc_trap_handler: jmp %l2 ! advance over trap instruction rett %l2 + 0x4 ! like this... +#ifndef CONFIG_SMP .align 4 - .globl linux_trap_nmi_sun4c -linux_trap_nmi_sun4c: + .globl linux_trap_ipi15 +linux_trap_ipi15: SAVE_ALL - /* Ugh, we need to clear the IRQ line. This is now - * a very sun4c specific trap handler... - */ - sethi %hi(interrupt_enable), %l5 - ld [%l5 + %lo(interrupt_enable)], %l5 - ldub [%l5], %l6 - andn %l6, INTS_ENAB, %l6 - stb %l6, [%l5] - /* Now it is safe to re-enable traps without recursion. */ or %l0, PSR_PIL, %l0 wr %l0, PSR_ET, %psr @@ -797,6 +789,8 @@ linux_trap_nmi_sun4c: RESTORE_ALL +#endif /* CONFIG_SMP */ + .align 4 .globl invalid_segment_patch1_ff .globl invalid_segment_patch2_ff diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 58778575983..054a49f3044 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -111,11 +111,8 @@ t_irq12:TRAP_ENTRY_INTERRUPT(12) /* IRQ Zilog serial chip */ t_irq13:TRAP_ENTRY_INTERRUPT(13) /* IRQ Audio Intr. */ t_irq14:TRAP_ENTRY_INTERRUPT(14) /* IRQ Timer #2 */ .globl t_nmi -#ifndef CONFIG_SMP -t_nmi: NMI_TRAP /* Level 15 (NMI) */ -#else -t_nmi: TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m) -#endif +t_nmi: TRAP_ENTRY(0x1f, linux_trap_ipi15) + t_racc: TRAP_ENTRY(0x20, do_reg_access) /* General Register Access Error */ t_iacce:BAD_TRAP(0x21) /* Instr Access Error */ t_bad22:BAD_TRAP(0x22) BAD_TRAP(0x23) diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index 4a2b8018ac9..998d90cb543 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -346,11 +346,6 @@ void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) void __init init_IRQ(void) { switch (sparc_cpu_model) { - case sun4c: - case sun4: - sun4c_init_IRQ(); - break; - case sun4m: pcic_probe(); if (pcic_present()) diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 8abbad38e34..8278df5d4ce 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -43,9 +43,6 @@ extern spinlock_t irq_action_lock; extern void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs); extern void init_IRQ(void); -/* sun4c_irq.c */ -extern void sun4c_init_IRQ(void); - /* sun4m_irq.c */ extern void sun4m_init_IRQ(void); extern void sun4m_unmask_profile_irq(void); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index efa07542e85..de81e21cbf6 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -100,7 +100,7 @@ void cpu_idle(void) printk("kernel faults / second = %ld\n", fps); #endif if (fps >= SUN4C_FAULT_HIGH) { - sun4c_grow_kernel_ring(); + /*sun4c_grow_kernel_ring();*/ } } local_irq_enable(); diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c deleted file mode 100644 index 39c64211b1b..00000000000 --- a/arch/sparc/kernel/sun4c_irq.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * sun4c irq support - * - * djhr: Hacked out of irq.c into a CPU dependent version. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1995 Miguel de Icaza (miguel@nuclecu.unam.mx) - * Copyright (C) 1995 Pete A. Zaitcev (zaitcev@yahoo.com) - * Copyright (C) 1996 Dave Redman (djhr@tadpole.co.uk) - */ - -#include - -#include -#include -#include -#include - -#include "irq.h" - -/* Sun4c interrupts are typically laid out as follows: - * - * 1 - Software interrupt, SBUS level 1 - * 2 - SBUS level 2 - * 3 - ESP SCSI, SBUS level 3 - * 4 - Software interrupt - * 5 - Lance ethernet, SBUS level 4 - * 6 - Software interrupt - * 7 - Graphics card, SBUS level 5 - * 8 - SBUS level 6 - * 9 - SBUS level 7 - * 10 - Counter timer - * 11 - Floppy - * 12 - Zilog uart - * 13 - CS4231 audio - * 14 - Profiling timer - * 15 - NMI - * - * The interrupt enable bits in the interrupt mask register are - * really only used to enable/disable the timer interrupts, and - * for signalling software interrupts. There is also a master - * interrupt enable bit in this register. - * - * Interrupts are enabled by setting the SUN4C_INT_* bits, they - * are disabled by clearing those bits. - */ - -/* - * Bit field defines for the interrupt registers on various - * Sparc machines. - */ - -/* The sun4c interrupt register. */ -#define SUN4C_INT_ENABLE 0x01 /* Allow interrupts. */ -#define SUN4C_INT_E14 0x80 /* Enable level 14 IRQ. */ -#define SUN4C_INT_E10 0x20 /* Enable level 10 IRQ. */ -#define SUN4C_INT_E8 0x10 /* Enable level 8 IRQ. */ -#define SUN4C_INT_E6 0x08 /* Enable level 6 IRQ. */ -#define SUN4C_INT_E4 0x04 /* Enable level 4 IRQ. */ -#define SUN4C_INT_E1 0x02 /* Enable level 1 IRQ. */ - -/* - * Pointer to the interrupt enable byte - * Used by entry.S - */ -unsigned char __iomem *interrupt_enable; - -static void sun4c_mask_irq(struct irq_data *data) -{ - unsigned long mask = (unsigned long)data->chip_data; - - if (mask) { - unsigned long flags; - - local_irq_save(flags); - mask = sbus_readb(interrupt_enable) & ~mask; - sbus_writeb(mask, interrupt_enable); - local_irq_restore(flags); - } -} - -static void sun4c_unmask_irq(struct irq_data *data) -{ - unsigned long mask = (unsigned long)data->chip_data; - - if (mask) { - unsigned long flags; - - local_irq_save(flags); - mask = sbus_readb(interrupt_enable) | mask; - sbus_writeb(mask, interrupt_enable); - local_irq_restore(flags); - } -} - -static unsigned int sun4c_startup_irq(struct irq_data *data) -{ - irq_link(data->irq); - sun4c_unmask_irq(data); - - return 0; -} - -static void sun4c_shutdown_irq(struct irq_data *data) -{ - sun4c_mask_irq(data); - irq_unlink(data->irq); -} - -static struct irq_chip sun4c_irq = { - .name = "sun4c", - .irq_startup = sun4c_startup_irq, - .irq_shutdown = sun4c_shutdown_irq, - .irq_mask = sun4c_mask_irq, - .irq_unmask = sun4c_unmask_irq, -}; - -static unsigned int sun4c_build_device_irq(struct platform_device *op, - unsigned int real_irq) -{ - unsigned int irq; - - if (real_irq >= 16) { - prom_printf("Bogus sun4c IRQ %u\n", real_irq); - prom_halt(); - } - - irq = irq_alloc(real_irq, real_irq); - if (irq) { - unsigned long mask = 0UL; - - switch (real_irq) { - case 1: - mask = SUN4C_INT_E1; - break; - case 8: - mask = SUN4C_INT_E8; - break; - case 10: - mask = SUN4C_INT_E10; - break; - case 14: - mask = SUN4C_INT_E14; - break; - default: - /* All the rest are either always enabled, - * or are for signalling software interrupts. - */ - break; - } - irq_set_chip_and_handler_name(irq, &sun4c_irq, - handle_level_irq, "level"); - irq_set_chip_data(irq, (void *)mask); - } - return irq; -} - -struct sun4c_timer_info { - u32 l10_count; - u32 l10_limit; - u32 l14_count; - u32 l14_limit; -}; - -static struct sun4c_timer_info __iomem *sun4c_timers; - -static void sun4c_clear_clock_irq(void) -{ - sbus_readl(&sun4c_timers->l10_limit); -} - -static void sun4c_load_profile_irq(int cpu, unsigned int limit) -{ - /* Errm.. not sure how to do this.. */ -} - -static void __init sun4c_init_timers(void) -{ - const struct linux_prom_irqs *prom_irqs; - struct device_node *dp; - unsigned int irq; - const u32 *addr; - int err; - - dp = of_find_node_by_name(NULL, "counter-timer"); - if (!dp) { - prom_printf("sun4c_init_timers: Unable to find counter-timer\n"); - prom_halt(); - } - - addr = of_get_property(dp, "address", NULL); - if (!addr) { - prom_printf("sun4c_init_timers: No address property\n"); - prom_halt(); - } - - sun4c_timers = (void __iomem *) (unsigned long) addr[0]; - - prom_irqs = of_get_property(dp, "intr", NULL); - of_node_put(dp); - if (!prom_irqs) { - prom_printf("sun4c_init_timers: No intr property\n"); - prom_halt(); - } - - /* Have the level 10 timer tick at 100HZ. We don't touch the - * level 14 timer limit since we are letting the prom handle - * them until we have a real console driver so L1-A works. - */ - sparc_config.cs_period = SBUS_CLOCK_RATE / HZ; - sparc_config.features |= - FEAT_L10_CLOCKSOURCE | FEAT_L10_CLOCKEVENT; - sbus_writel(timer_value(sparc_config.cs_period), - &sun4c_timers->l10_limit); - - master_l10_counter = &sun4c_timers->l10_count; - - irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri); - err = request_irq(irq, timer_interrupt, IRQF_TIMER, "timer", NULL); - if (err) { - prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err); - prom_halt(); - } - - /* disable timer interrupt */ - sun4c_mask_irq(irq_get_irq_data(irq)); -} - -#ifdef CONFIG_SMP -static void sun4c_nop(void) -{ -} -#endif - -void __init sun4c_init_IRQ(void) -{ - struct device_node *dp; - const u32 *addr; - - dp = of_find_node_by_name(NULL, "interrupt-enable"); - if (!dp) { - prom_printf("sun4c_init_IRQ: Unable to find interrupt-enable\n"); - prom_halt(); - } - - addr = of_get_property(dp, "address", NULL); - of_node_put(dp); - if (!addr) { - prom_printf("sun4c_init_IRQ: No address property\n"); - prom_halt(); - } - - interrupt_enable = (void __iomem *) (unsigned long) addr[0]; - - BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); - - sparc_config.init_timers = sun4c_init_timers; - sparc_config.build_device_irq = sun4c_build_device_irq; - sparc_config.clock_rate = SBUS_CLOCK_RATE; - -#ifdef CONFIG_SMP - BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(clear_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(set_irq_udt, sun4c_nop, BTFIXUPCALL_NOP); -#endif - sbus_writeb(SUN4C_INT_ENABLE, interrupt_enable); - /* Cannot enable interrupts until OBP ticker is disabled. */ -} diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 301421c1129..2e68054f97f 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -18,8 +18,4 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o # Only used by sparc32 obj-$(CONFIG_HIGHMEM) += highmem.o -ifdef CONFIG_SMP obj-$(CONFIG_SPARC32) += nosun4c.o -else -obj-$(CONFIG_SPARC32) += sun4c.o -endif diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c deleted file mode 100644 index 1cf4f198709..00000000000 --- a/arch/sparc/mm/sun4c.c +++ /dev/null @@ -1,2166 +0,0 @@ -/* sun4c.c: Doing in software what should be done in hardware. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 1996 Andrew Tridgell (Andrew.Tridgell@anu.edu.au) - * Copyright (C) 1997-2000 Anton Blanchard (anton@samba.org) - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#define NR_TASK_BUCKETS 512 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Because of our dynamic kernel TLB miss strategy, and how - * our DVMA mapping allocation works, you _MUST_: - * - * 1) Disable interrupts _and_ not touch any dynamic kernel - * memory while messing with kernel MMU state. By - * dynamic memory I mean any object which is not in - * the kernel image itself or a thread_union (both of - * which are locked into the MMU). - * 2) Disable interrupts while messing with user MMU state. - */ - -extern int num_segmaps, num_contexts; - -extern unsigned long page_kernel; - -/* That's it, we prom_halt() on sun4c if the cache size is something other than 65536. - * So let's save some cycles and just use that everywhere except for that bootup - * sanity check. - */ -#define SUN4C_VAC_SIZE 65536 - -#define SUN4C_KERNEL_BUCKETS 32 - -/* Flushing the cache. */ -struct sun4c_vac_props sun4c_vacinfo; -unsigned long sun4c_kernel_faults; - -/* Invalidate every sun4c cache line tag. */ -static void __init sun4c_flush_all(void) -{ - unsigned long begin, end; - - if (sun4c_vacinfo.on) - panic("SUN4C: AIEEE, trying to invalidate vac while it is on."); - - /* Clear 'valid' bit in all cache line tags */ - begin = AC_CACHETAGS; - end = (AC_CACHETAGS + SUN4C_VAC_SIZE); - while (begin < end) { - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : - "r" (begin), "i" (ASI_CONTROL)); - begin += sun4c_vacinfo.linesize; - } -} - -static void sun4c_flush_context_hw(void) -{ - unsigned long end = SUN4C_VAC_SIZE; - - __asm__ __volatile__( - "1: addcc %0, -4096, %0\n\t" - " bne 1b\n\t" - " sta %%g0, [%0] %2" - : "=&r" (end) - : "0" (end), "i" (ASI_HWFLUSHCONTEXT) - : "cc"); -} - -/* Must be called minimally with IRQs disabled. */ -static void sun4c_flush_segment_hw(unsigned long addr) -{ - if (sun4c_get_segmap(addr) != invalid_segment) { - unsigned long vac_size = SUN4C_VAC_SIZE; - - __asm__ __volatile__( - "1: addcc %0, -4096, %0\n\t" - " bne 1b\n\t" - " sta %%g0, [%2 + %0] %3" - : "=&r" (vac_size) - : "0" (vac_size), "r" (addr), "i" (ASI_HWFLUSHSEG) - : "cc"); - } -} - -/* File local boot time fixups. */ -BTFIXUPDEF_CALL(void, sun4c_flush_page, unsigned long) -BTFIXUPDEF_CALL(void, sun4c_flush_segment, unsigned long) -BTFIXUPDEF_CALL(void, sun4c_flush_context, void) - -#define sun4c_flush_page(addr) BTFIXUP_CALL(sun4c_flush_page)(addr) -#define sun4c_flush_segment(addr) BTFIXUP_CALL(sun4c_flush_segment)(addr) -#define sun4c_flush_context() BTFIXUP_CALL(sun4c_flush_context)() - -/* Must be called minimally with interrupts disabled. */ -static void sun4c_flush_page_hw(unsigned long addr) -{ - addr &= PAGE_MASK; - if ((int)sun4c_get_pte(addr) < 0) - __asm__ __volatile__("sta %%g0, [%0] %1" - : : "r" (addr), "i" (ASI_HWFLUSHPAGE)); -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_context_sw(void) -{ - unsigned long nbytes = SUN4C_VAC_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %0, %%o5, %0\n\t" - "sta %%g0, [%0] %3\n\t" - "sta %%g0, [%0 + %2] %3\n\t" - "sta %%g0, [%0 + %%g1] %3\n\t" - "sta %%g0, [%0 + %%g2] %3\n\t" - "sta %%g0, [%0 + %%g3] %3\n\t" - "sta %%g0, [%0 + %%g4] %3\n\t" - "sta %%g0, [%0 + %%g5] %3\n\t" - "bg 1b\n\t" - " sta %%g0, [%1 + %%o4] %3\n" - : "=&r" (nbytes) - : "0" (nbytes), "r" (lsize), "i" (ASI_FLUSHCTX) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_segment_sw(unsigned long addr) -{ - if (sun4c_get_segmap(addr) != invalid_segment) { - unsigned long nbytes = SUN4C_VAC_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %1, %%o5, %1\n\t" - "sta %%g0, [%0] %6\n\t" - "sta %%g0, [%0 + %2] %6\n\t" - "sta %%g0, [%0 + %%g1] %6\n\t" - "sta %%g0, [%0 + %%g2] %6\n\t" - "sta %%g0, [%0 + %%g3] %6\n\t" - "sta %%g0, [%0 + %%g4] %6\n\t" - "sta %%g0, [%0 + %%g5] %6\n\t" - "sta %%g0, [%0 + %%o4] %6\n\t" - "bg 1b\n\t" - " add %0, %%o5, %0\n" - : "=&r" (addr), "=&r" (nbytes), "=&r" (lsize) - : "0" (addr), "1" (nbytes), "2" (lsize), - "i" (ASI_FLUSHSEG) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); - } -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_page_sw(unsigned long addr) -{ - addr &= PAGE_MASK; - if ((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) == - _SUN4C_PAGE_VALID) { - unsigned long left = PAGE_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %1, %%o5, %1\n\t" - "sta %%g0, [%0] %6\n\t" - "sta %%g0, [%0 + %2] %6\n\t" - "sta %%g0, [%0 + %%g1] %6\n\t" - "sta %%g0, [%0 + %%g2] %6\n\t" - "sta %%g0, [%0 + %%g3] %6\n\t" - "sta %%g0, [%0 + %%g4] %6\n\t" - "sta %%g0, [%0 + %%g5] %6\n\t" - "sta %%g0, [%0 + %%o4] %6\n\t" - "bg 1b\n\t" - " add %0, %%o5, %0\n" - : "=&r" (addr), "=&r" (left), "=&r" (lsize) - : "0" (addr), "1" (left), "2" (lsize), - "i" (ASI_FLUSHPG) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); - } -} - -/* The sun4c's do have an on chip store buffer. And the way you - * clear them out isn't so obvious. The only way I can think of - * to accomplish this is to read the current context register, - * store the same value there, then read an external hardware - * register. - */ -void sun4c_complete_all_stores(void) -{ - volatile int _unused; - - _unused = sun4c_get_context(); - sun4c_set_context(_unused); - _unused = get_auxio(); -} - -/* Bootup utility functions. */ -static inline void sun4c_init_clean_segmap(unsigned char pseg) -{ - unsigned long vaddr; - - sun4c_put_segmap(0, pseg); - for (vaddr = 0; vaddr < SUN4C_REAL_PGDIR_SIZE; vaddr += PAGE_SIZE) - sun4c_put_pte(vaddr, 0); - sun4c_put_segmap(0, invalid_segment); -} - -static inline void sun4c_init_clean_mmu(unsigned long kernel_end) -{ - unsigned long vaddr; - unsigned char savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - for (vaddr = 0; vaddr < 0x20000000; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = 0xe0000000; vaddr < KERNBASE; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = kernel_end; vaddr < KADB_DEBUGGER_BEGVM; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = LINUX_OPPROM_ENDVM; vaddr; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - } - sun4c_set_context(savectx); -} - -void __init sun4c_probe_vac(void) -{ - sun4c_disable_vac(); - - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) { - /* PROM on SS1 lacks this info, to be super safe we - * hard code it here since this arch is cast in stone. - */ - sun4c_vacinfo.num_bytes = 65536; - sun4c_vacinfo.linesize = 16; - } else { - sun4c_vacinfo.num_bytes = - prom_getintdefault(prom_root_node, "vac-size", 65536); - sun4c_vacinfo.linesize = - prom_getintdefault(prom_root_node, "vac-linesize", 16); - } - sun4c_vacinfo.do_hwflushes = - prom_getintdefault(prom_root_node, "vac-hwflush", 0); - - if (sun4c_vacinfo.do_hwflushes == 0) - sun4c_vacinfo.do_hwflushes = - prom_getintdefault(prom_root_node, "vac_hwflush", 0); - - if (sun4c_vacinfo.num_bytes != 65536) { - prom_printf("WEIRD Sun4C VAC cache size, " - "tell sparclinux@vger.kernel.org"); - prom_halt(); - } - - switch (sun4c_vacinfo.linesize) { - case 16: - sun4c_vacinfo.log2lsize = 4; - break; - case 32: - sun4c_vacinfo.log2lsize = 5; - break; - default: - prom_printf("probe_vac: Didn't expect vac-linesize of %d, halting\n", - sun4c_vacinfo.linesize); - prom_halt(); - } - - sun4c_flush_all(); - sun4c_enable_vac(); -} - -/* Patch instructions for the low level kernel fault handler. */ -extern unsigned long invalid_segment_patch1, invalid_segment_patch1_ff; -extern unsigned long invalid_segment_patch2, invalid_segment_patch2_ff; -extern unsigned long invalid_segment_patch1_1ff, invalid_segment_patch2_1ff; -extern unsigned long num_context_patch1, num_context_patch1_16; -extern unsigned long num_context_patch2_16; -extern unsigned long vac_linesize_patch, vac_linesize_patch_32; -extern unsigned long vac_hwflush_patch1, vac_hwflush_patch1_on; -extern unsigned long vac_hwflush_patch2, vac_hwflush_patch2_on; - -#define PATCH_INSN(src, dst) do { \ - daddr = &(dst); \ - iaddr = &(src); \ - *daddr = *iaddr; \ - } while (0) - -static void __init patch_kernel_fault_handler(void) -{ - unsigned long *iaddr, *daddr; - - switch (num_segmaps) { - case 128: - /* Default, nothing to do. */ - break; - case 256: - PATCH_INSN(invalid_segment_patch1_ff, - invalid_segment_patch1); - PATCH_INSN(invalid_segment_patch2_ff, - invalid_segment_patch2); - break; - case 512: - PATCH_INSN(invalid_segment_patch1_1ff, - invalid_segment_patch1); - PATCH_INSN(invalid_segment_patch2_1ff, - invalid_segment_patch2); - break; - default: - prom_printf("Unhandled number of segmaps: %d\n", - num_segmaps); - prom_halt(); - } - switch (num_contexts) { - case 8: - /* Default, nothing to do. */ - break; - case 16: - PATCH_INSN(num_context_patch1_16, - num_context_patch1); - break; - default: - prom_printf("Unhandled number of contexts: %d\n", - num_contexts); - prom_halt(); - } - - if (sun4c_vacinfo.do_hwflushes != 0) { - PATCH_INSN(vac_hwflush_patch1_on, vac_hwflush_patch1); - PATCH_INSN(vac_hwflush_patch2_on, vac_hwflush_patch2); - } else { - switch (sun4c_vacinfo.linesize) { - case 16: - /* Default, nothing to do. */ - break; - case 32: - PATCH_INSN(vac_linesize_patch_32, vac_linesize_patch); - break; - default: - prom_printf("Impossible VAC linesize %d, halting...\n", - sun4c_vacinfo.linesize); - prom_halt(); - } - } -} - -static void __init sun4c_probe_mmu(void) -{ - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) { - /* Hardcode these just to be safe, PROM on SS1 does - * not have this info available in the root node. - */ - num_segmaps = 128; - num_contexts = 8; - } else { - num_segmaps = - prom_getintdefault(prom_root_node, "mmu-npmg", 128); - num_contexts = - prom_getintdefault(prom_root_node, "mmu-nctx", 0x8); - } - patch_kernel_fault_handler(); -} - -volatile unsigned long __iomem *sun4c_memerr_reg = NULL; - -void __init sun4c_probe_memerr_reg(void) -{ - phandle node; - struct linux_prom_registers regs[1]; - - node = prom_getchild(prom_root_node); - node = prom_searchsiblings(prom_root_node, "memory-error"); - if (!node) - return; - if (prom_getproperty(node, "reg", (char *)regs, sizeof(regs)) <= 0) - return; - /* hmm I think regs[0].which_io is zero here anyways */ - sun4c_memerr_reg = ioremap(regs[0].phys_addr, regs[0].reg_size); -} - -static inline void sun4c_init_ss2_cache_bug(void) -{ - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS2)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_IPX)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_ELC))) { - /* Whee.. */ - printk("SS2 cache bug detected, uncaching trap table page\n"); - sun4c_flush_page((unsigned int) &_start); - sun4c_put_pte(((unsigned long) &_start), - (sun4c_get_pte((unsigned long) &_start) | _SUN4C_PAGE_NOCACHE)); - } -} - -/* Addr is always aligned on a page boundary for us already. */ -static int sun4c_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, - unsigned long addr, int len) -{ - unsigned long page, end; - - *pba = addr; - - end = PAGE_ALIGN((addr + len)); - while (addr < end) { - page = va; - sun4c_flush_page(page); - page -= PAGE_OFFSET; - page >>= PAGE_SHIFT; - page |= (_SUN4C_PAGE_VALID | _SUN4C_PAGE_DIRTY | - _SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_PRIV); - sun4c_put_pte(addr, page); - addr += PAGE_SIZE; - va += PAGE_SIZE; - } - - return 0; -} - -static void sun4c_unmap_dma_area(struct device *dev, unsigned long busa, int len) -{ - /* Fortunately for us, bus_addr == uncached_virt in sun4c. */ - /* XXX Implement this */ -} - -/* TLB management. */ - -/* Don't change this struct without changing entry.S. This is used - * in the in-window kernel fault handler, and you don't want to mess - * with that. (See sun4c_fault in entry.S). - */ -struct sun4c_mmu_entry { - struct sun4c_mmu_entry *next; - struct sun4c_mmu_entry *prev; - unsigned long vaddr; - unsigned char pseg; - unsigned char locked; - - /* For user mappings only, and completely hidden from kernel - * TLB miss code. - */ - unsigned char ctx; - struct sun4c_mmu_entry *lru_next; - struct sun4c_mmu_entry *lru_prev; -}; - -static struct sun4c_mmu_entry mmu_entry_pool[SUN4C_MAX_SEGMAPS]; - -static void __init sun4c_init_mmu_entry_pool(void) -{ - int i; - - for (i=0; i < SUN4C_MAX_SEGMAPS; i++) { - mmu_entry_pool[i].pseg = i; - mmu_entry_pool[i].next = NULL; - mmu_entry_pool[i].prev = NULL; - mmu_entry_pool[i].vaddr = 0; - mmu_entry_pool[i].locked = 0; - mmu_entry_pool[i].ctx = 0; - mmu_entry_pool[i].lru_next = NULL; - mmu_entry_pool[i].lru_prev = NULL; - } - mmu_entry_pool[invalid_segment].locked = 1; -} - -static inline void fix_permissions(unsigned long vaddr, unsigned long bits_on, - unsigned long bits_off) -{ - unsigned long start, end; - - end = vaddr + SUN4C_REAL_PGDIR_SIZE; - for (start = vaddr; start < end; start += PAGE_SIZE) - if (sun4c_get_pte(start) & _SUN4C_PAGE_VALID) - sun4c_put_pte(start, (sun4c_get_pte(start) | bits_on) & - ~bits_off); -} - -static inline void sun4c_init_map_kernelprom(unsigned long kernel_end) -{ - unsigned long vaddr; - unsigned char pseg, ctx; - - for (vaddr = KADB_DEBUGGER_BEGVM; - vaddr < LINUX_OPPROM_ENDVM; - vaddr += SUN4C_REAL_PGDIR_SIZE) { - pseg = sun4c_get_segmap(vaddr); - if (pseg != invalid_segment) { - mmu_entry_pool[pseg].locked = 1; - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, vaddr, pseg); - fix_permissions(vaddr, _SUN4C_PAGE_PRIV, 0); - } - } - - for (vaddr = KERNBASE; vaddr < kernel_end; vaddr += SUN4C_REAL_PGDIR_SIZE) { - pseg = sun4c_get_segmap(vaddr); - mmu_entry_pool[pseg].locked = 1; - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, vaddr, pseg); - fix_permissions(vaddr, _SUN4C_PAGE_PRIV, _SUN4C_PAGE_NOCACHE); - } -} - -static void __init sun4c_init_lock_area(unsigned long start, unsigned long end) -{ - int i, ctx; - - while (start < end) { - for (i = 0; i < invalid_segment; i++) - if (!mmu_entry_pool[i].locked) - break; - mmu_entry_pool[i].locked = 1; - sun4c_init_clean_segmap(i); - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, start, mmu_entry_pool[i].pseg); - start += SUN4C_REAL_PGDIR_SIZE; - } -} - -/* Don't change this struct without changing entry.S. This is used - * in the in-window kernel fault handler, and you don't want to mess - * with that. (See sun4c_fault in entry.S). - */ -struct sun4c_mmu_ring { - struct sun4c_mmu_entry ringhd; - int num_entries; -}; - -static struct sun4c_mmu_ring sun4c_context_ring[SUN4C_MAX_CONTEXTS]; /* used user entries */ -static struct sun4c_mmu_ring sun4c_ufree_ring; /* free user entries */ -static struct sun4c_mmu_ring sun4c_ulru_ring; /* LRU user entries */ -struct sun4c_mmu_ring sun4c_kernel_ring; /* used kernel entries */ -struct sun4c_mmu_ring sun4c_kfree_ring; /* free kernel entries */ - -static inline void sun4c_init_rings(void) -{ - int i; - - for (i = 0; i < SUN4C_MAX_CONTEXTS; i++) { - sun4c_context_ring[i].ringhd.next = - sun4c_context_ring[i].ringhd.prev = - &sun4c_context_ring[i].ringhd; - sun4c_context_ring[i].num_entries = 0; - } - sun4c_ufree_ring.ringhd.next = sun4c_ufree_ring.ringhd.prev = - &sun4c_ufree_ring.ringhd; - sun4c_ufree_ring.num_entries = 0; - sun4c_ulru_ring.ringhd.lru_next = sun4c_ulru_ring.ringhd.lru_prev = - &sun4c_ulru_ring.ringhd; - sun4c_ulru_ring.num_entries = 0; - sun4c_kernel_ring.ringhd.next = sun4c_kernel_ring.ringhd.prev = - &sun4c_kernel_ring.ringhd; - sun4c_kernel_ring.num_entries = 0; - sun4c_kfree_ring.ringhd.next = sun4c_kfree_ring.ringhd.prev = - &sun4c_kfree_ring.ringhd; - sun4c_kfree_ring.num_entries = 0; -} - -static void add_ring(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *head = &ring->ringhd; - - entry->prev = head; - (entry->next = head->next)->prev = entry; - head->next = entry; - ring->num_entries++; -} - -static inline void add_lru(struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_ring *ring = &sun4c_ulru_ring; - struct sun4c_mmu_entry *head = &ring->ringhd; - - entry->lru_next = head; - (entry->lru_prev = head->lru_prev)->lru_next = entry; - head->lru_prev = entry; -} - -static void add_ring_ordered(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *head = &ring->ringhd; - unsigned long addr = entry->vaddr; - - while ((head->next != &ring->ringhd) && (head->next->vaddr < addr)) - head = head->next; - - entry->prev = head; - (entry->next = head->next)->prev = entry; - head->next = entry; - ring->num_entries++; - - add_lru(entry); -} - -static inline void remove_ring(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *next = entry->next; - - (next->prev = entry->prev)->next = next; - ring->num_entries--; -} - -static void remove_lru(struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *next = entry->lru_next; - - (next->lru_prev = entry->lru_prev)->lru_next = next; -} - -static void free_user_entry(int ctx, struct sun4c_mmu_entry *entry) -{ - remove_ring(sun4c_context_ring+ctx, entry); - remove_lru(entry); - add_ring(&sun4c_ufree_ring, entry); -} - -static void free_kernel_entry(struct sun4c_mmu_entry *entry, - struct sun4c_mmu_ring *ring) -{ - remove_ring(ring, entry); - add_ring(&sun4c_kfree_ring, entry); -} - -static void __init sun4c_init_fill_kernel_ring(int howmany) -{ - int i; - - while (howmany) { - for (i = 0; i < invalid_segment; i++) - if (!mmu_entry_pool[i].locked) - break; - mmu_entry_pool[i].locked = 1; - sun4c_init_clean_segmap(i); - add_ring(&sun4c_kfree_ring, &mmu_entry_pool[i]); - howmany--; - } -} - -static void __init sun4c_init_fill_user_ring(void) -{ - int i; - - for (i = 0; i < invalid_segment; i++) { - if (mmu_entry_pool[i].locked) - continue; - sun4c_init_clean_segmap(i); - add_ring(&sun4c_ufree_ring, &mmu_entry_pool[i]); - } -} - -static void sun4c_kernel_unmap(struct sun4c_mmu_entry *kentry) -{ - int savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(kentry->vaddr, invalid_segment); - } - sun4c_set_context(savectx); -} - -static void sun4c_kernel_map(struct sun4c_mmu_entry *kentry) -{ - int savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(kentry->vaddr, kentry->pseg); - } - sun4c_set_context(savectx); -} - -#define sun4c_user_unmap(__entry) \ - sun4c_put_segmap((__entry)->vaddr, invalid_segment) - -static void sun4c_demap_context(struct sun4c_mmu_ring *crp, unsigned char ctx) -{ - struct sun4c_mmu_entry *head = &crp->ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - flush_user_windows(); - sun4c_set_context(ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); -} - -static int sun4c_user_taken_entries; /* This is how much we have. */ -static int max_user_taken_entries; /* This limits us and prevents deadlock. */ - -static struct sun4c_mmu_entry *sun4c_kernel_strategy(void) -{ - struct sun4c_mmu_entry *this_entry; - - /* If some are free, return first one. */ - if (sun4c_kfree_ring.num_entries) { - this_entry = sun4c_kfree_ring.ringhd.next; - return this_entry; - } - - /* Else free one up. */ - this_entry = sun4c_kernel_ring.ringhd.prev; - sun4c_flush_segment(this_entry->vaddr); - sun4c_kernel_unmap(this_entry); - free_kernel_entry(this_entry, &sun4c_kernel_ring); - this_entry = sun4c_kfree_ring.ringhd.next; - - return this_entry; -} - -/* Using this method to free up mmu entries eliminates a lot of - * potential races since we have a kernel that incurs tlb - * replacement faults. There may be performance penalties. - * - * NOTE: Must be called with interrupts disabled. - */ -static struct sun4c_mmu_entry *sun4c_user_strategy(void) -{ - struct sun4c_mmu_entry *entry; - unsigned char ctx; - int savectx; - - /* If some are free, return first one. */ - if (sun4c_ufree_ring.num_entries) { - entry = sun4c_ufree_ring.ringhd.next; - goto unlink_out; - } - - if (sun4c_user_taken_entries) { - entry = sun4c_kernel_strategy(); - sun4c_user_taken_entries--; - goto kunlink_out; - } - - /* Grab from the beginning of the LRU list. */ - entry = sun4c_ulru_ring.ringhd.lru_next; - ctx = entry->ctx; - - savectx = sun4c_get_context(); - flush_user_windows(); - sun4c_set_context(ctx); - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - remove_ring(sun4c_context_ring + ctx, entry); - remove_lru(entry); - sun4c_set_context(savectx); - - return entry; - -unlink_out: - remove_ring(&sun4c_ufree_ring, entry); - return entry; -kunlink_out: - remove_ring(&sun4c_kfree_ring, entry); - return entry; -} - -/* NOTE: Must be called with interrupts disabled. */ -void sun4c_grow_kernel_ring(void) -{ - struct sun4c_mmu_entry *entry; - - /* Prevent deadlock condition. */ - if (sun4c_user_taken_entries >= max_user_taken_entries) - return; - - if (sun4c_ufree_ring.num_entries) { - entry = sun4c_ufree_ring.ringhd.next; - remove_ring(&sun4c_ufree_ring, entry); - add_ring(&sun4c_kfree_ring, entry); - sun4c_user_taken_entries++; - } -} - -/* 2 page buckets for task struct and kernel stack allocation. - * - * TASK_STACK_BEGIN - * bucket[0] - * bucket[1] - * [ ... ] - * bucket[NR_TASK_BUCKETS-1] - * TASK_STACK_BEGIN + (sizeof(struct task_bucket) * NR_TASK_BUCKETS) - * - * Each slot looks like: - * - * page 1 -- task struct + beginning of kernel stack - * page 2 -- rest of kernel stack - */ - -union task_union *sun4c_bucket[NR_TASK_BUCKETS]; - -static int sun4c_lowbucket_avail; - -#define BUCKET_EMPTY ((union task_union *) 0) -#define BUCKET_SHIFT (PAGE_SHIFT + 1) /* log2(sizeof(struct task_bucket)) */ -#define BUCKET_SIZE (1 << BUCKET_SHIFT) -#define BUCKET_NUM(addr) ((((addr) - SUN4C_LOCK_VADDR) >> BUCKET_SHIFT)) -#define BUCKET_ADDR(num) (((num) << BUCKET_SHIFT) + SUN4C_LOCK_VADDR) -#define BUCKET_PTE(page) \ - ((((page) - PAGE_OFFSET) >> PAGE_SHIFT) | pgprot_val(SUN4C_PAGE_KERNEL)) -#define BUCKET_PTE_PAGE(pte) \ - (PAGE_OFFSET + (((pte) & SUN4C_PFN_MASK) << PAGE_SHIFT)) - -static void get_locked_segment(unsigned long addr) -{ - struct sun4c_mmu_entry *stolen; - unsigned long flags; - - local_irq_save(flags); - addr &= SUN4C_REAL_PGDIR_MASK; - stolen = sun4c_user_strategy(); - max_user_taken_entries--; - stolen->vaddr = addr; - flush_user_windows(); - sun4c_kernel_map(stolen); - local_irq_restore(flags); -} - -static void free_locked_segment(unsigned long addr) -{ - struct sun4c_mmu_entry *entry; - unsigned long flags; - unsigned char pseg; - - local_irq_save(flags); - addr &= SUN4C_REAL_PGDIR_MASK; - pseg = sun4c_get_segmap(addr); - entry = &mmu_entry_pool[pseg]; - - flush_user_windows(); - sun4c_flush_segment(addr); - sun4c_kernel_unmap(entry); - add_ring(&sun4c_ufree_ring, entry); - max_user_taken_entries++; - local_irq_restore(flags); -} - -static inline void garbage_collect(int entry) -{ - int start, end; - - /* 32 buckets per segment... */ - entry &= ~31; - start = entry; - for (end = (start + 32); start < end; start++) - if (sun4c_bucket[start] != BUCKET_EMPTY) - return; - - /* Entire segment empty, release it. */ - free_locked_segment(BUCKET_ADDR(entry)); -} - -static struct thread_info *sun4c_alloc_thread_info_node(int node) -{ - unsigned long addr, pages; - int entry; - - pages = __get_free_pages(GFP_KERNEL, THREAD_INFO_ORDER); - if (!pages) - return NULL; - - for (entry = sun4c_lowbucket_avail; entry < NR_TASK_BUCKETS; entry++) - if (sun4c_bucket[entry] == BUCKET_EMPTY) - break; - if (entry == NR_TASK_BUCKETS) { - free_pages(pages, THREAD_INFO_ORDER); - return NULL; - } - if (entry >= sun4c_lowbucket_avail) - sun4c_lowbucket_avail = entry + 1; - - addr = BUCKET_ADDR(entry); - sun4c_bucket[entry] = (union task_union *) addr; - if(sun4c_get_segmap(addr) == invalid_segment) - get_locked_segment(addr); - - /* We are changing the virtual color of the page(s) - * so we must flush the cache to guarantee consistency. - */ - sun4c_flush_page(pages); - sun4c_flush_page(pages + PAGE_SIZE); - - sun4c_put_pte(addr, BUCKET_PTE(pages)); - sun4c_put_pte(addr + PAGE_SIZE, BUCKET_PTE(pages + PAGE_SIZE)); - -#ifdef CONFIG_DEBUG_STACK_USAGE - memset((void *)addr, 0, PAGE_SIZE << THREAD_INFO_ORDER); -#endif /* DEBUG_STACK_USAGE */ - - return (struct thread_info *) addr; -} - -static void sun4c_free_thread_info(struct thread_info *ti) -{ - unsigned long tiaddr = (unsigned long) ti; - unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tiaddr)); - int entry = BUCKET_NUM(tiaddr); - - /* We are deleting a mapping, so the flush here is mandatory. */ - sun4c_flush_page(tiaddr); - sun4c_flush_page(tiaddr + PAGE_SIZE); - - sun4c_put_pte(tiaddr, 0); - sun4c_put_pte(tiaddr + PAGE_SIZE, 0); - - sun4c_bucket[entry] = BUCKET_EMPTY; - if (entry < sun4c_lowbucket_avail) - sun4c_lowbucket_avail = entry; - - free_pages(pages, THREAD_INFO_ORDER); - garbage_collect(entry); -} - -static void __init sun4c_init_buckets(void) -{ - int entry; - - if (sizeof(union thread_union) != (PAGE_SIZE << THREAD_INFO_ORDER)) { - extern void thread_info_size_is_bolixed_pete(void); - thread_info_size_is_bolixed_pete(); - } - - for (entry = 0; entry < NR_TASK_BUCKETS; entry++) - sun4c_bucket[entry] = BUCKET_EMPTY; - sun4c_lowbucket_avail = 0; -} - -static unsigned long sun4c_iobuffer_start; -static unsigned long sun4c_iobuffer_end; -static unsigned long sun4c_iobuffer_high; -static unsigned long *sun4c_iobuffer_map; -static int iobuffer_map_size; - -/* - * Alias our pages so they do not cause a trap. - * Also one page may be aliased into several I/O areas and we may - * finish these I/O separately. - */ -static char *sun4c_lockarea(char *vaddr, unsigned long size) -{ - unsigned long base, scan; - unsigned long npages; - unsigned long vpage; - unsigned long pte; - unsigned long apage; - unsigned long high; - unsigned long flags; - - npages = (((unsigned long)vaddr & ~PAGE_MASK) + - size + (PAGE_SIZE-1)) >> PAGE_SHIFT; - - local_irq_save(flags); - base = bitmap_find_next_zero_area(sun4c_iobuffer_map, iobuffer_map_size, - 0, npages, 0); - if (base >= iobuffer_map_size) - goto abend; - - high = ((base + npages) << PAGE_SHIFT) + sun4c_iobuffer_start; - high = SUN4C_REAL_PGDIR_ALIGN(high); - while (high > sun4c_iobuffer_high) { - get_locked_segment(sun4c_iobuffer_high); - sun4c_iobuffer_high += SUN4C_REAL_PGDIR_SIZE; - } - - vpage = ((unsigned long) vaddr) & PAGE_MASK; - for (scan = base; scan < base+npages; scan++) { - pte = ((vpage-PAGE_OFFSET) >> PAGE_SHIFT); - pte |= pgprot_val(SUN4C_PAGE_KERNEL); - pte |= _SUN4C_PAGE_NOCACHE; - set_bit(scan, sun4c_iobuffer_map); - apage = (scan << PAGE_SHIFT) + sun4c_iobuffer_start; - - /* Flush original mapping so we see the right things later. */ - sun4c_flush_page(vpage); - - sun4c_put_pte(apage, pte); - vpage += PAGE_SIZE; - } - local_irq_restore(flags); - return (char *) ((base << PAGE_SHIFT) + sun4c_iobuffer_start + - (((unsigned long) vaddr) & ~PAGE_MASK)); - -abend: - local_irq_restore(flags); - printk("DMA vaddr=0x%p size=%08lx\n", vaddr, size); - panic("Out of iobuffer table"); - return NULL; -} - -static void sun4c_unlockarea(char *vaddr, unsigned long size) -{ - unsigned long vpage, npages; - unsigned long flags; - int scan, high; - - vpage = (unsigned long)vaddr & PAGE_MASK; - npages = (((unsigned long)vaddr & ~PAGE_MASK) + - size + (PAGE_SIZE-1)) >> PAGE_SHIFT; - - local_irq_save(flags); - while (npages != 0) { - --npages; - - /* This mapping is marked non-cachable, no flush necessary. */ - sun4c_put_pte(vpage, 0); - clear_bit((vpage - sun4c_iobuffer_start) >> PAGE_SHIFT, - sun4c_iobuffer_map); - vpage += PAGE_SIZE; - } - - /* garbage collect */ - scan = (sun4c_iobuffer_high - sun4c_iobuffer_start) >> PAGE_SHIFT; - while (scan >= 0 && !sun4c_iobuffer_map[scan >> 5]) - scan -= 32; - scan += 32; - high = sun4c_iobuffer_start + (scan << PAGE_SHIFT); - high = SUN4C_REAL_PGDIR_ALIGN(high) + SUN4C_REAL_PGDIR_SIZE; - while (high < sun4c_iobuffer_high) { - sun4c_iobuffer_high -= SUN4C_REAL_PGDIR_SIZE; - free_locked_segment(sun4c_iobuffer_high); - } - local_irq_restore(flags); -} - -/* Note the scsi code at init time passes to here buffers - * which sit on the kernel stack, those are already locked - * by implication and fool the page locking code above - * if passed to by mistake. - */ -static __u32 sun4c_get_scsi_one(struct device *dev, char *bufptr, unsigned long len) -{ - unsigned long page; - - page = ((unsigned long)bufptr) & PAGE_MASK; - if (!virt_addr_valid(page)) { - sun4c_flush_page(page); - return (__u32)bufptr; /* already locked */ - } - return (__u32)sun4c_lockarea(bufptr, len); -} - -static void sun4c_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) -{ - while (sz != 0) { - --sz; - sg->dma_address = (__u32)sun4c_lockarea(sg_virt(sg), sg->length); - sg->dma_length = sg->length; - sg = sg_next(sg); - } -} - -static void sun4c_release_scsi_one(struct device *dev, __u32 bufptr, unsigned long len) -{ - if (bufptr < sun4c_iobuffer_start) - return; /* On kernel stack or similar, see above */ - sun4c_unlockarea((char *)bufptr, len); -} - -static void sun4c_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) -{ - while (sz != 0) { - --sz; - sun4c_unlockarea((char *)sg->dma_address, sg->length); - sg = sg_next(sg); - } -} - -#define TASK_ENTRY_SIZE BUCKET_SIZE /* see above */ -#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) - -struct vm_area_struct sun4c_kstack_vma; - -static void __init sun4c_init_lock_areas(void) -{ - unsigned long sun4c_taskstack_start; - unsigned long sun4c_taskstack_end; - int bitmap_size; - - sun4c_init_buckets(); - sun4c_taskstack_start = SUN4C_LOCK_VADDR; - sun4c_taskstack_end = (sun4c_taskstack_start + - (TASK_ENTRY_SIZE * NR_TASK_BUCKETS)); - if (sun4c_taskstack_end >= SUN4C_LOCK_END) { - prom_printf("Too many tasks, decrease NR_TASK_BUCKETS please.\n"); - prom_halt(); - } - - sun4c_iobuffer_start = sun4c_iobuffer_high = - SUN4C_REAL_PGDIR_ALIGN(sun4c_taskstack_end); - sun4c_iobuffer_end = SUN4C_LOCK_END; - bitmap_size = (sun4c_iobuffer_end - sun4c_iobuffer_start) >> PAGE_SHIFT; - bitmap_size = (bitmap_size + 7) >> 3; - bitmap_size = LONG_ALIGN(bitmap_size); - iobuffer_map_size = bitmap_size << 3; - sun4c_iobuffer_map = __alloc_bootmem(bitmap_size, SMP_CACHE_BYTES, 0UL); - memset((void *) sun4c_iobuffer_map, 0, bitmap_size); - - sun4c_kstack_vma.vm_mm = &init_mm; - sun4c_kstack_vma.vm_start = sun4c_taskstack_start; - sun4c_kstack_vma.vm_end = sun4c_taskstack_end; - sun4c_kstack_vma.vm_page_prot = PAGE_SHARED; - sun4c_kstack_vma.vm_flags = VM_READ | VM_WRITE | VM_EXEC; - insert_vm_struct(&init_mm, &sun4c_kstack_vma); -} - -/* Cache flushing on the sun4c. */ -static void sun4c_flush_cache_all(void) -{ - unsigned long begin, end; - - flush_user_windows(); - begin = (KERNBASE + SUN4C_REAL_PGDIR_SIZE); - end = (begin + SUN4C_VAC_SIZE); - - if (sun4c_vacinfo.linesize == 32) { - while (begin < end) { - __asm__ __volatile__( - "ld [%0 + 0x00], %%g0\n\t" - "ld [%0 + 0x20], %%g0\n\t" - "ld [%0 + 0x40], %%g0\n\t" - "ld [%0 + 0x60], %%g0\n\t" - "ld [%0 + 0x80], %%g0\n\t" - "ld [%0 + 0xa0], %%g0\n\t" - "ld [%0 + 0xc0], %%g0\n\t" - "ld [%0 + 0xe0], %%g0\n\t" - "ld [%0 + 0x100], %%g0\n\t" - "ld [%0 + 0x120], %%g0\n\t" - "ld [%0 + 0x140], %%g0\n\t" - "ld [%0 + 0x160], %%g0\n\t" - "ld [%0 + 0x180], %%g0\n\t" - "ld [%0 + 0x1a0], %%g0\n\t" - "ld [%0 + 0x1c0], %%g0\n\t" - "ld [%0 + 0x1e0], %%g0\n" - : : "r" (begin)); - begin += 512; - } - } else { - while (begin < end) { - __asm__ __volatile__( - "ld [%0 + 0x00], %%g0\n\t" - "ld [%0 + 0x10], %%g0\n\t" - "ld [%0 + 0x20], %%g0\n\t" - "ld [%0 + 0x30], %%g0\n\t" - "ld [%0 + 0x40], %%g0\n\t" - "ld [%0 + 0x50], %%g0\n\t" - "ld [%0 + 0x60], %%g0\n\t" - "ld [%0 + 0x70], %%g0\n\t" - "ld [%0 + 0x80], %%g0\n\t" - "ld [%0 + 0x90], %%g0\n\t" - "ld [%0 + 0xa0], %%g0\n\t" - "ld [%0 + 0xb0], %%g0\n\t" - "ld [%0 + 0xc0], %%g0\n\t" - "ld [%0 + 0xd0], %%g0\n\t" - "ld [%0 + 0xe0], %%g0\n\t" - "ld [%0 + 0xf0], %%g0\n" - : : "r" (begin)); - begin += 256; - } - } -} - -static void sun4c_flush_cache_mm(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - flush_user_windows(); - - if (sun4c_context_ring[new_ctx].num_entries) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); - } - } -} - -static void sun4c_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - flush_user_windows(); - - local_irq_save(flags); - /* All user segmap chains are ordered on entry->vaddr. */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - /* Tracing various job mixtures showed that this conditional - * only passes ~35% of the time for most worse case situations, - * therefore we avoid all of this gross overhead ~65% of the time. - */ - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - sun4c_set_context(new_ctx); - - /* At this point, always, (start >= entry->vaddr) and - * (entry->vaddr < end), once the latter condition - * ceases to hold, or we hit the end of the list, we - * exit the loop. The ordering of all user allocated - * segmaps makes this all work out so beautifully. - */ - do { - struct sun4c_mmu_entry *next = entry->next; - unsigned long realend; - - /* "realstart" is always >= entry->vaddr */ - realend = entry->vaddr + SUN4C_REAL_PGDIR_SIZE; - if (end < realend) - realend = end; - if ((realend - entry->vaddr) <= (PAGE_SIZE << 3)) { - unsigned long page = entry->vaddr; - while (page < realend) { - sun4c_flush_page(page); - page += PAGE_SIZE; - } - } else { - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - } - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_cache_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - /* Sun4c has no separate I/D caches so cannot optimize for non - * text page flushes. - */ - if (new_ctx != NO_CONTEXT) { - int octx = sun4c_get_context(); - unsigned long flags; - - flush_user_windows(); - local_irq_save(flags); - sun4c_set_context(new_ctx); - sun4c_flush_page(page); - sun4c_set_context(octx); - local_irq_restore(flags); - } -} - -static void sun4c_flush_page_to_ram(unsigned long page) -{ - unsigned long flags; - - local_irq_save(flags); - sun4c_flush_page(page); - local_irq_restore(flags); -} - -/* Sun4c cache is unified, both instructions and data live there, so - * no need to flush the on-stack instructions for new signal handlers. - */ -static void sun4c_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) -{ -} - -/* TLB flushing on the sun4c. These routines count on the cache - * flushing code to flush the user register windows so that we need - * not do so when we get here. - */ - -static void sun4c_flush_tlb_all(void) -{ - struct sun4c_mmu_entry *this_entry, *next_entry; - unsigned long flags; - int savectx, ctx; - - local_irq_save(flags); - this_entry = sun4c_kernel_ring.ringhd.next; - savectx = sun4c_get_context(); - flush_user_windows(); - while (sun4c_kernel_ring.num_entries) { - next_entry = this_entry->next; - sun4c_flush_segment(this_entry->vaddr); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(this_entry->vaddr, invalid_segment); - } - free_kernel_entry(this_entry, &sun4c_kernel_ring); - this_entry = next_entry; - } - sun4c_set_context(savectx); - local_irq_restore(flags); -} - -static void sun4c_flush_tlb_mm(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - local_irq_save(flags); - /* See commentary in sun4c_flush_cache_range(). */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - int savectx = sun4c_get_context(); - unsigned long flags; - - local_irq_save(flags); - sun4c_set_context(new_ctx); - page &= PAGE_MASK; - sun4c_flush_page(page); - sun4c_put_pte(page, 0); - sun4c_set_context(savectx); - local_irq_restore(flags); - } -} - -static inline void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr) -{ - unsigned long page_entry, pg_iobits; - - pg_iobits = _SUN4C_PAGE_PRESENT | _SUN4C_READABLE | _SUN4C_WRITEABLE | - _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE; - - page_entry = ((physaddr >> PAGE_SHIFT) & SUN4C_PFN_MASK); - page_entry |= ((pg_iobits | _SUN4C_PAGE_PRIV) & ~(_SUN4C_PAGE_PRESENT)); - sun4c_put_pte(virt_addr, page_entry); -} - -static void sun4c_mapiorange(unsigned int bus, unsigned long xpa, - unsigned long xva, unsigned int len) -{ - while (len != 0) { - len -= PAGE_SIZE; - sun4c_mapioaddr(xpa, xva); - xva += PAGE_SIZE; - xpa += PAGE_SIZE; - } -} - -static void sun4c_unmapiorange(unsigned long virt_addr, unsigned int len) -{ - while (len != 0) { - len -= PAGE_SIZE; - sun4c_put_pte(virt_addr, 0); - virt_addr += PAGE_SIZE; - } -} - -static void sun4c_alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) -{ - struct ctx_list *ctxp; - - ctxp = ctx_free.next; - if (ctxp != &ctx_free) { - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - mm->context = ctxp->ctx_number; - ctxp->ctx_mm = mm; - return; - } - ctxp = ctx_used.next; - if (ctxp->ctx_mm == old_mm) - ctxp = ctxp->next; - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - ctxp->ctx_mm->context = NO_CONTEXT; - ctxp->ctx_mm = mm; - mm->context = ctxp->ctx_number; - sun4c_demap_context(&sun4c_context_ring[ctxp->ctx_number], - ctxp->ctx_number); -} - -/* Switch the current MM context. */ -static void sun4c_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) -{ - struct ctx_list *ctx; - int dirty = 0; - - if (mm->context == NO_CONTEXT) { - dirty = 1; - sun4c_alloc_context(old_mm, mm); - } else { - /* Update the LRU ring of contexts. */ - ctx = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx); - add_to_used_ctxlist(ctx); - } - if (dirty || old_mm != mm) - sun4c_set_context(mm->context); -} - -static void sun4c_destroy_context(struct mm_struct *mm) -{ - struct ctx_list *ctx_old; - - if (mm->context != NO_CONTEXT) { - sun4c_demap_context(&sun4c_context_ring[mm->context], mm->context); - ctx_old = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx_old); - add_to_free_ctxlist(ctx_old); - mm->context = NO_CONTEXT; - } -} - -static void sun4c_mmu_info(struct seq_file *m) -{ - int used_user_entries, i; - - used_user_entries = 0; - for (i = 0; i < num_contexts; i++) - used_user_entries += sun4c_context_ring[i].num_entries; - - seq_printf(m, - "vacsize\t\t: %d bytes\n" - "vachwflush\t: %s\n" - "vaclinesize\t: %d bytes\n" - "mmuctxs\t\t: %d\n" - "mmupsegs\t: %d\n" - "kernelpsegs\t: %d\n" - "kfreepsegs\t: %d\n" - "usedpsegs\t: %d\n" - "ufreepsegs\t: %d\n" - "user_taken\t: %d\n" - "max_taken\t: %d\n", - sun4c_vacinfo.num_bytes, - (sun4c_vacinfo.do_hwflushes ? "yes" : "no"), - sun4c_vacinfo.linesize, - num_contexts, - (invalid_segment + 1), - sun4c_kernel_ring.num_entries, - sun4c_kfree_ring.num_entries, - used_user_entries, - sun4c_ufree_ring.num_entries, - sun4c_user_taken_entries, - max_user_taken_entries); -} - -/* Nothing below here should touch the mmu hardware nor the mmu_entry - * data structures. - */ - -/* First the functions which the mid-level code uses to directly - * manipulate the software page tables. Some defines since we are - * emulating the i386 page directory layout. - */ -#define PGD_PRESENT 0x001 -#define PGD_RW 0x002 -#define PGD_USER 0x004 -#define PGD_ACCESSED 0x020 -#define PGD_DIRTY 0x040 -#define PGD_TABLE (PGD_PRESENT | PGD_RW | PGD_USER | PGD_ACCESSED | PGD_DIRTY) - -static void sun4c_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} - -static void sun4c_pgd_set(pgd_t * pgdp, pmd_t * pmdp) -{ -} - -static void sun4c_pmd_set(pmd_t * pmdp, pte_t * ptep) -{ - pmdp->pmdv[0] = PGD_TABLE | (unsigned long) ptep; -} - -static void sun4c_pmd_populate(pmd_t * pmdp, struct page * ptep) -{ - if (page_address(ptep) == NULL) BUG(); /* No highmem on sun4c */ - pmdp->pmdv[0] = PGD_TABLE | (unsigned long) page_address(ptep); -} - -static int sun4c_pte_present(pte_t pte) -{ - return ((pte_val(pte) & (_SUN4C_PAGE_PRESENT | _SUN4C_PAGE_PRIV)) != 0); -} -static void sun4c_pte_clear(pte_t *ptep) { *ptep = __pte(0); } - -static int sun4c_pmd_bad(pmd_t pmd) -{ - return (((pmd_val(pmd) & ~PAGE_MASK) != PGD_TABLE) || - (!virt_addr_valid(pmd_val(pmd)))); -} - -static int sun4c_pmd_present(pmd_t pmd) -{ - return ((pmd_val(pmd) & PGD_PRESENT) != 0); -} - -#if 0 /* if PMD takes one word */ -static void sun4c_pmd_clear(pmd_t *pmdp) { *pmdp = __pmd(0); } -#else /* if pmd_t is a longish aggregate */ -static void sun4c_pmd_clear(pmd_t *pmdp) { - memset((void *)pmdp, 0, sizeof(pmd_t)); -} -#endif - -static int sun4c_pgd_none(pgd_t pgd) { return 0; } -static int sun4c_pgd_bad(pgd_t pgd) { return 0; } -static int sun4c_pgd_present(pgd_t pgd) { return 1; } -static void sun4c_pgd_clear(pgd_t * pgdp) { } - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static pte_t sun4c_pte_mkwrite(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_WRITE); - if (pte_val(pte) & _SUN4C_PAGE_MODIFIED) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE); - return pte; -} - -static pte_t sun4c_pte_mkdirty(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_MODIFIED); - if (pte_val(pte) & _SUN4C_PAGE_WRITE) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE); - return pte; -} - -static pte_t sun4c_pte_mkyoung(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_ACCESSED); - if (pte_val(pte) & _SUN4C_PAGE_READ) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_READ); - return pte; -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -static pte_t sun4c_mk_pte(struct page *page, pgprot_t pgprot) -{ - return __pte(page_to_pfn(page) | pgprot_val(pgprot)); -} - -static pte_t sun4c_mk_pte_phys(unsigned long phys_page, pgprot_t pgprot) -{ - return __pte((phys_page >> PAGE_SHIFT) | pgprot_val(pgprot)); -} - -static pte_t sun4c_mk_pte_io(unsigned long page, pgprot_t pgprot, int space) -{ - return __pte(((page - PAGE_OFFSET) >> PAGE_SHIFT) | pgprot_val(pgprot)); -} - -static unsigned long sun4c_pte_pfn(pte_t pte) -{ - return pte_val(pte) & SUN4C_PFN_MASK; -} - -static pte_t sun4c_pgoff_to_pte(unsigned long pgoff) -{ - return __pte(pgoff | _SUN4C_PAGE_FILE); -} - -static unsigned long sun4c_pte_to_pgoff(pte_t pte) -{ - return pte_val(pte) & ((1UL << PTE_FILE_MAX_BITS) - 1); -} - - -static inline unsigned long sun4c_pmd_page_v(pmd_t pmd) -{ - return (pmd_val(pmd) & PAGE_MASK); -} - -static struct page *sun4c_pmd_page(pmd_t pmd) -{ - return virt_to_page(sun4c_pmd_page_v(pmd)); -} - -static unsigned long sun4c_pgd_page(pgd_t pgd) { return 0; } - -/* to find an entry in a page-table-directory */ -static inline pgd_t *sun4c_pgd_offset(struct mm_struct * mm, unsigned long address) -{ - return mm->pgd + (address >> SUN4C_PGDIR_SHIFT); -} - -/* Find an entry in the second-level page table.. */ -static pmd_t *sun4c_pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} - -/* Find an entry in the third-level page table.. */ -pte_t *sun4c_pte_offset_kernel(pmd_t * dir, unsigned long address) -{ - return (pte_t *) sun4c_pmd_page_v(*dir) + - ((address >> PAGE_SHIFT) & (SUN4C_PTRS_PER_PTE - 1)); -} - -static unsigned long sun4c_swp_type(swp_entry_t entry) -{ - return (entry.val & SUN4C_SWP_TYPE_MASK); -} - -static unsigned long sun4c_swp_offset(swp_entry_t entry) -{ - return (entry.val >> SUN4C_SWP_OFF_SHIFT) & SUN4C_SWP_OFF_MASK; -} - -static swp_entry_t sun4c_swp_entry(unsigned long type, unsigned long offset) -{ - return (swp_entry_t) { - (offset & SUN4C_SWP_OFF_MASK) << SUN4C_SWP_OFF_SHIFT - | (type & SUN4C_SWP_TYPE_MASK) }; -} - -static void sun4c_free_pte_slow(pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static void sun4c_free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -static pgd_t *sun4c_get_pgd_fast(void) -{ - unsigned long *ret; - - if ((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; - pgtable_cache_size--; - } else { - pgd_t *init; - - ret = (unsigned long *)__get_free_page(GFP_KERNEL); - memset (ret, 0, (KERNBASE / SUN4C_PGDIR_SIZE) * sizeof(pgd_t)); - init = sun4c_pgd_offset(&init_mm, 0); - memcpy (((pgd_t *)ret) + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return (pgd_t *)ret; -} - -static void sun4c_free_pgd_fast(pgd_t *pgd) -{ - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; -} - - -static inline pte_t * -sun4c_pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -{ - unsigned long *ret; - - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - - if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL) - return pte; - - pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pte; -} - -static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - struct page *page; - - pte = sun4c_pte_alloc_one_kernel(mm, address); - if (pte == NULL) - return NULL; - page = virt_to_page(pte); - pgtable_page_ctor(page); - return page; -} - -static inline void sun4c_free_pte_fast(pte_t *pte) -{ - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; - pgtable_cache_size++; -} - -static void sun4c_pte_free(pgtable_t pte) -{ - pgtable_page_dtor(pte); - sun4c_free_pte_fast(page_address(pte)); -} - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - */ -static pmd_t *sun4c_pmd_alloc_one(struct mm_struct *mm, unsigned long address) -{ - BUG(); - return NULL; -} - -static void sun4c_free_pmd_fast(pmd_t * pmd) { } - -static void sun4c_check_pgt_cache(int low, int high) -{ - if (pgtable_cache_size > high) { - do { - if (pgd_quicklist) - sun4c_free_pgd_slow(sun4c_get_pgd_fast()); - if (pte_quicklist) - sun4c_free_pte_slow(sun4c_pte_alloc_one_fast(NULL, 0)); - } while (pgtable_cache_size > low); - } -} - -/* An experiment, turn off by default for now... -DaveM */ -#define SUN4C_PRELOAD_PSEG - -void sun4c_update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) -{ - unsigned long flags; - int pseg; - - if (vma->vm_mm->context == NO_CONTEXT) - return; - - local_irq_save(flags); - address &= PAGE_MASK; - if ((pseg = sun4c_get_segmap(address)) == invalid_segment) { - struct sun4c_mmu_entry *entry = sun4c_user_strategy(); - struct mm_struct *mm = vma->vm_mm; - unsigned long start, end; - - entry->vaddr = start = (address & SUN4C_REAL_PGDIR_MASK); - entry->ctx = mm->context; - add_ring_ordered(sun4c_context_ring + mm->context, entry); - sun4c_put_segmap(entry->vaddr, entry->pseg); - end = start + SUN4C_REAL_PGDIR_SIZE; - while (start < end) { -#ifdef SUN4C_PRELOAD_PSEG - pgd_t *pgdp = sun4c_pgd_offset(mm, start); - pte_t *ptep; - - if (!pgdp) - goto no_mapping; - ptep = sun4c_pte_offset_kernel((pmd_t *) pgdp, start); - if (!ptep || !(pte_val(*ptep) & _SUN4C_PAGE_PRESENT)) - goto no_mapping; - sun4c_put_pte(start, pte_val(*ptep)); - goto next; - - no_mapping: -#endif - sun4c_put_pte(start, 0); -#ifdef SUN4C_PRELOAD_PSEG - next: -#endif - start += PAGE_SIZE; - } -#ifndef SUN4C_PRELOAD_PSEG - sun4c_put_pte(address, pte_val(*ptep)); -#endif - local_irq_restore(flags); - return; - } else { - struct sun4c_mmu_entry *entry = &mmu_entry_pool[pseg]; - - remove_lru(entry); - add_lru(entry); - } - - sun4c_put_pte(address, pte_val(*ptep)); - local_irq_restore(flags); -} - -extern void sparc_context_init(int); -extern unsigned long bootmem_init(unsigned long *pages_avail); -extern unsigned long last_valid_pfn; - -void __init sun4c_paging_init(void) -{ - int i, cnt; - unsigned long kernel_end, vaddr; - extern struct resource sparc_iomap; - unsigned long end_pfn, pages_avail; - - kernel_end = (unsigned long) &_end; - kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); - - pages_avail = 0; - last_valid_pfn = bootmem_init(&pages_avail); - end_pfn = last_valid_pfn; - - sun4c_probe_mmu(); - invalid_segment = (num_segmaps - 1); - sun4c_init_mmu_entry_pool(); - sun4c_init_rings(); - sun4c_init_map_kernelprom(kernel_end); - sun4c_init_clean_mmu(kernel_end); - sun4c_init_fill_kernel_ring(SUN4C_KERNEL_BUCKETS); - sun4c_init_lock_area(sparc_iomap.start, IOBASE_END); - sun4c_init_lock_area(DVMA_VADDR, DVMA_END); - sun4c_init_lock_areas(); - sun4c_init_fill_user_ring(); - - sun4c_set_context(0); - memset(swapper_pg_dir, 0, PAGE_SIZE); - memset(pg0, 0, PAGE_SIZE); - memset(pg1, 0, PAGE_SIZE); - memset(pg2, 0, PAGE_SIZE); - memset(pg3, 0, PAGE_SIZE); - - /* Save work later. */ - vaddr = VMALLOC_START; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg0); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg1); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg2); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg3); - sun4c_init_ss2_cache_bug(); - sparc_context_init(num_contexts); - - { - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long zholes_size[MAX_NR_ZONES]; - unsigned long npages; - int znum; - - for (znum = 0; znum < MAX_NR_ZONES; znum++) - zones_size[znum] = zholes_size[znum] = 0; - - npages = max_low_pfn - pfn_base; - - zones_size[ZONE_DMA] = npages; - zholes_size[ZONE_DMA] = npages - pages_avail; - - npages = highend_pfn - max_low_pfn; - zones_size[ZONE_HIGHMEM] = npages; - zholes_size[ZONE_HIGHMEM] = npages - calc_highpages(); - - free_area_init_node(0, zones_size, pfn_base, zholes_size); - } - - cnt = 0; - for (i = 0; i < num_segmaps; i++) - if (mmu_entry_pool[i].locked) - cnt++; - - max_user_taken_entries = num_segmaps - cnt - 40 - 1; - - printk("SUN4C: %d mmu entries for the kernel\n", cnt); -} - -static pgprot_t sun4c_pgprot_noncached(pgprot_t prot) -{ - prot |= __pgprot(_SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE); - - return prot; -} - -/* Load up routines and constants for sun4c mmu */ -void __init ld_mmu_sun4c(void) -{ - extern void ___xchg32_sun4c(void); - - printk("Loading sun4c MMU routines\n"); - - /* First the constants */ - BTFIXUPSET_SIMM13(pgdir_shift, SUN4C_PGDIR_SHIFT); - BTFIXUPSET_SETHI(pgdir_size, SUN4C_PGDIR_SIZE); - BTFIXUPSET_SETHI(pgdir_mask, SUN4C_PGDIR_MASK); - - BTFIXUPSET_SIMM13(ptrs_per_pmd, SUN4C_PTRS_PER_PMD); - BTFIXUPSET_SIMM13(ptrs_per_pgd, SUN4C_PTRS_PER_PGD); - BTFIXUPSET_SIMM13(user_ptrs_per_pgd, KERNBASE / SUN4C_PGDIR_SIZE); - - BTFIXUPSET_INT(page_none, pgprot_val(SUN4C_PAGE_NONE)); - PAGE_SHARED = pgprot_val(SUN4C_PAGE_SHARED); - BTFIXUPSET_INT(page_copy, pgprot_val(SUN4C_PAGE_COPY)); - BTFIXUPSET_INT(page_readonly, pgprot_val(SUN4C_PAGE_READONLY)); - BTFIXUPSET_INT(page_kernel, pgprot_val(SUN4C_PAGE_KERNEL)); - page_kernel = pgprot_val(SUN4C_PAGE_KERNEL); - - /* Functions */ - BTFIXUPSET_CALL(pgprot_noncached, sun4c_pgprot_noncached, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4c, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(do_check_pgt_cache, sun4c_check_pgt_cache, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_cache_all, sun4c_flush_cache_all, BTFIXUPCALL_NORM); - - if (sun4c_vacinfo.do_hwflushes) { - BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_hw, BTFIXUPCALL_NORM); - } else { - BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_sw, BTFIXUPCALL_NORM); - } - - BTFIXUPSET_CALL(flush_tlb_mm, sun4c_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, sun4c_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(destroy_context, sun4c_destroy_context, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(switch_mm, sun4c_switch_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, sun4c_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, sun4c_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, sun4c_flush_tlb_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, sun4c_flush_cache_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__flush_page_to_ram, sun4c_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_all, sun4c_flush_tlb_all, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_sig_insns, sun4c_flush_sig_insns, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(set_pte, sun4c_set_pte, BTFIXUPCALL_STO1O0); - - BTFIXUPSET_CALL(pte_pfn, sun4c_pte_pfn, BTFIXUPCALL_NORM); -#if 0 /* PAGE_SHIFT <= 12 */ /* Eek. Investigate. XXX */ - BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_ANDNINT(PAGE_SIZE - 1)); -#else - BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_NORM); -#endif - BTFIXUPSET_CALL(pmd_set, sun4c_pmd_set, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_populate, sun4c_pmd_populate, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_present, sun4c_pte_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_clear, sun4c_pte_clear, BTFIXUPCALL_STG0O0); - - BTFIXUPSET_CALL(pmd_bad, sun4c_pmd_bad, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_present, sun4c_pmd_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, sun4c_pmd_clear, BTFIXUPCALL_STG0O0); - - BTFIXUPSET_CALL(pgd_none, sun4c_pgd_none, BTFIXUPCALL_RETINT(0)); - BTFIXUPSET_CALL(pgd_bad, sun4c_pgd_bad, BTFIXUPCALL_RETINT(0)); - BTFIXUPSET_CALL(pgd_present, sun4c_pgd_present, BTFIXUPCALL_RETINT(1)); - BTFIXUPSET_CALL(pgd_clear, sun4c_pgd_clear, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(mk_pte, sun4c_mk_pte, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_phys, sun4c_mk_pte_phys, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_io, sun4c_mk_pte_io, BTFIXUPCALL_NORM); - - BTFIXUPSET_INT(pte_modify_mask, _SUN4C_PAGE_CHG_MASK); - BTFIXUPSET_CALL(pmd_offset, sun4c_pmd_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_offset_kernel, sun4c_pte_offset_kernel, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pte_fast, sun4c_free_pte_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_free, sun4c_pte_free, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one_kernel, sun4c_pte_alloc_one_kernel, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one, sun4c_pte_alloc_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pmd_fast, sun4c_free_pmd_fast, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0); - BTFIXUPSET_CALL(free_pgd_fast, sun4c_free_pgd_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(get_pgd_fast, sun4c_get_pgd_fast, BTFIXUPCALL_NORM); - - BTFIXUPSET_HALF(pte_writei, _SUN4C_PAGE_WRITE); - BTFIXUPSET_HALF(pte_dirtyi, _SUN4C_PAGE_MODIFIED); - BTFIXUPSET_HALF(pte_youngi, _SUN4C_PAGE_ACCESSED); - BTFIXUPSET_HALF(pte_filei, _SUN4C_PAGE_FILE); - BTFIXUPSET_HALF(pte_wrprotecti, _SUN4C_PAGE_WRITE|_SUN4C_PAGE_SILENT_WRITE); - BTFIXUPSET_HALF(pte_mkcleani, _SUN4C_PAGE_MODIFIED|_SUN4C_PAGE_SILENT_WRITE); - BTFIXUPSET_HALF(pte_mkoldi, _SUN4C_PAGE_ACCESSED|_SUN4C_PAGE_SILENT_READ); - BTFIXUPSET_CALL(pte_mkwrite, sun4c_pte_mkwrite, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_mkdirty, sun4c_pte_mkdirty, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_mkyoung, sun4c_pte_mkyoung, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(update_mmu_cache, sun4c_update_mmu_cache, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_to_pgoff, sun4c_pte_to_pgoff, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgoff_to_pte, sun4c_pgoff_to_pte, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_lockarea, sun4c_lockarea, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unlockarea, sun4c_unlockarea, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_get_scsi_one, sun4c_get_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, sun4c_get_scsi_sgl, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_one, sun4c_release_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_sgl, sun4c_release_scsi_sgl, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_map_dma_area, sun4c_map_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unmap_dma_area, sun4c_unmap_dma_area, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(sparc_mapiorange, sun4c_mapiorange, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sparc_unmapiorange, sun4c_unmapiorange, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__swp_type, sun4c_swp_type, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_offset, sun4c_swp_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_entry, sun4c_swp_entry, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(alloc_thread_info_node, sun4c_alloc_thread_info_node, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_info, sun4c_mmu_info, BTFIXUPCALL_NORM); - - /* These should _never_ get called with two level tables. */ - BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0); -} diff --git a/arch/sparc/prom/Makefile b/arch/sparc/prom/Makefile index 8287bbe8876..020300b18c0 100644 --- a/arch/sparc/prom/Makefile +++ b/arch/sparc/prom/Makefile @@ -10,7 +10,6 @@ lib-$(CONFIG_SPARC32) += memory.o lib-y += misc_$(BITS).o lib-$(CONFIG_SPARC32) += mp.o lib-$(CONFIG_SPARC32) += ranges.o -lib-$(CONFIG_SPARC32) += segment.o lib-y += console_$(BITS).o lib-y += printf.o lib-y += tree_$(BITS).o diff --git a/arch/sparc/prom/segment.c b/arch/sparc/prom/segment.c deleted file mode 100644 index 86a663f1d3c..00000000000 --- a/arch/sparc/prom/segment.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * segment.c: Prom routine to map segments in other contexts before - * a standalone is completely mapped. This is for sun4 and - * sun4c architectures only. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - */ - -#include -#include -#include -#include -#include - -extern void restore_current(void); - -/* Set physical segment 'segment' at virtual address 'vaddr' in - * context 'ctx'. - */ -void -prom_putsegment(int ctx, unsigned long vaddr, int segment) -{ - unsigned long flags; - spin_lock_irqsave(&prom_lock, flags); - (*(romvec->pv_setctxt))(ctx, (char *) vaddr, segment); - restore_current(); - spin_unlock_irqrestore(&prom_lock, flags); -} -- cgit v1.2.3-70-g09d2 From c68e5d39a502d01421cbc70d25c377e9215facef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 13 May 2012 23:09:04 -0700 Subject: sparc32: Implement hard_smp_processor_id() via instruction patching. This is the last non-trivial user of btfixup. Like sparc64, use a special patch section to resolve the various implementations of how to read the current CPU's ID when we don't have current_thread_info()->cpu necessarily available. Signed-off-by: David S. Miller Tested-by: Sam Ravnborg --- arch/sparc/include/asm/smp_32.h | 43 +------------------------------- arch/sparc/include/asm/winmacro.h | 49 ++++++++++++++---------------------- arch/sparc/kernel/entry.S | 23 +++++++++++++++++ arch/sparc/kernel/kernel.h | 2 -- arch/sparc/kernel/leon_smp.c | 29 +--------------------- arch/sparc/kernel/setup_32.c | 51 ++++++++++++++++++++++++++++++++++++++ arch/sparc/kernel/sparc_ksyms_32.c | 5 ---- arch/sparc/kernel/sun4d_smp.c | 27 +++----------------- arch/sparc/kernel/sun4m_smp.c | 23 ----------------- arch/sparc/kernel/trampoline_32.S | 22 ++-------------- 10 files changed, 99 insertions(+), 175 deletions(-) (limited to 'arch/sparc/kernel/kernel.h') diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 01c51c70434..f5b325e731d 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -59,12 +59,9 @@ void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long) -BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) BTFIXUPDEF_CALL(void, smp_ipi_resched, int); BTFIXUPDEF_CALL(void, smp_ipi_single, int); BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int); -BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) -BTFIXUPDEF_BLACKBOX(load_current) #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4) @@ -117,45 +114,7 @@ extern inline int hard_smpleon_processor_id(void) return cpuid; } -#ifndef MODULE -static inline int hard_smp_processor_id(void) -{ - int cpuid; - - /* Black box - sun4m - __asm__ __volatile__("rd %%tbr, %0\n\t" - "srl %0, 12, %0\n\t" - "and %0, 3, %0\n\t" : - "=&r" (cpuid)); - - sun4d - __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" - "nop; nop" : - "=&r" (cpuid)); - - leon - __asm__ __volatile__( "rd %asr17, %0\n\t" - "srl %0, 0x1c, %0\n\t" - "nop\n\t" : - "=&r" (cpuid)); - See btfixup.h and btfixupprep.c to understand how a blackbox works. - */ - __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" - "sethi %%hi(boot_cpu_id), %0\n\t" - "ldub [%0 + %%lo(boot_cpu_id)], %0\n\t" : - "=&r" (cpuid)); - return cpuid; -} -#else -static inline int hard_smp_processor_id(void) -{ - int cpuid; - - __asm__ __volatile__("mov %%o7, %%g1\n\t" - "call ___f___hard_smp_processor_id\n\t" - " nop\n\t" - "mov %%g2, %0\n\t" : "=r"(cpuid) : : "g1", "g2"); - return cpuid; -} -#endif +extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h index a9be04b0d04..9b7b21764cd 100644 --- a/arch/sparc/include/asm/winmacro.h +++ b/arch/sparc/include/asm/winmacro.h @@ -103,37 +103,24 @@ st %scratch, [%cur_reg + TI_W_SAVED]; #ifdef CONFIG_SMP -/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */ -#define LOAD_CURRENT4M(dest_reg, idreg) \ - rd %tbr, %idreg; \ - sethi %hi(current_set), %dest_reg; \ - srl %idreg, 10, %idreg; \ - or %dest_reg, %lo(current_set), %dest_reg; \ - and %idreg, 0xc, %idreg; \ - ld [%idreg + %dest_reg], %dest_reg; - -#define LOAD_CURRENT4D(dest_reg, idreg) \ - lda [%g0] ASI_M_VIKING_TMP1, %idreg; \ - sethi %hi(C_LABEL(current_set)), %dest_reg; \ - sll %idreg, 2, %idreg; \ - or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \ - ld [%idreg + %dest_reg], %dest_reg; - -#define LOAD_CURRENT_LEON(dest_reg, idreg) \ - rd %asr17, %idreg; \ - sethi %hi(current_set), %dest_reg; \ - srl %idreg, 0x1c, %idreg; \ - or %dest_reg, %lo(current_set), %dest_reg; \ - sll %idreg, 0x2, %idreg; \ - ld [%idreg + %dest_reg], %dest_reg; - -/* Blackbox - take care with this... - check smp4m and smp4d before changing this. */ -#define LOAD_CURRENT(dest_reg, idreg) \ - sethi %hi(___b_load_current), %idreg; \ - sethi %hi(current_set), %dest_reg; \ - sethi %hi(boot_cpu_id4), %idreg; \ - or %dest_reg, %lo(current_set), %dest_reg; \ - ldub [%idreg + %lo(boot_cpu_id4)], %idreg; \ +#define LOAD_CURRENT(dest_reg, idreg) \ +661: rd %tbr, %idreg; \ + srl %idreg, 10, %idreg; \ + and %idreg, 0xc, %idreg; \ + .section .cpuid_patch, "ax"; \ + /* Instruction location. */ \ + .word 661b; \ + /* SUN4D implementation. */ \ + lda [%g0] ASI_M_VIKING_TMP1, %idreg; \ + sll %idreg, 2, %idreg; \ + nop; \ + /* LEON implementation. */ \ + rd %asr17, %idreg; \ + srl %idreg, 0x1c, %idreg; \ + sll %idreg, 0x02, %idreg; \ + .previous; \ + sethi %hi(current_set), %dest_reg; \ + or %dest_reg, %lo(current_set), %dest_reg;\ ld [%idreg + %dest_reg], %dest_reg; #else #define LOAD_CURRENT(dest_reg, idreg) \ diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index d7143ba6a8f..773f3f05bf2 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -7,6 +7,7 @@ * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au) */ +#include #include #include @@ -1347,4 +1348,26 @@ flushw_all: ret restore +#ifdef CONFIG_SMP +ENTRY(hard_smp_processor_id) +661: rd %tbr, %g1 + srl %g1, 12, %o0 + and %o0, 3, %o0 + .section .cpuid_patch, "ax" + /* Instruction location. */ + .word 661b + /* SUN4D implementation. */ + lda [%g0] ASI_M_VIKING_TMP1, %o0 + nop + nop + /* LEON implementation. */ + rd %asr17, %o0 + srl %o0, 0x1c, %o0 + nop + .previous + retl + nop +ENDPROC(hard_smp_processor_id) +#endif + /* End of entry.S */ diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 8278df5d4ce..1c1a7d39c7e 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -80,8 +80,6 @@ extern unsigned int patchme_maybe_smp_msg[]; extern void floppy_hardint(void); /* trampoline_32.S */ -extern int __smp4m_processor_id(void); -extern int __smp4d_processor_id(void); extern unsigned long sun4m_cpu_startup; extern unsigned long sun4d_cpu_startup; diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index f3e3630e31a..faff792d53c 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -73,7 +73,7 @@ static inline unsigned long do_swap(volatile unsigned long *ptr, void __cpuinit leon_callin(void) { - int cpuid = hard_smpleon_processor_id(); + int cpuid = hard_smp_processor_id(); local_ops->cache_all(); local_ops->tlb_all(); @@ -491,39 +491,12 @@ void leon_cross_call_irq(void) ccall_info.processors_out[i] = 1; } -void __init leon_blackbox_id(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - int rs1 = rd >> 11; - - /* patch places where ___b_hard_smp_processor_id appears */ - addr[0] = 0x81444000 | rd; /* rd %asr17, reg */ - addr[1] = 0x8130201c | rd | rs1; /* srl reg, 0x1c, reg */ - addr[2] = 0x01000000; /* nop */ -} - -void __init leon_blackbox_current(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - int rs1 = rd >> 11; - - /* patch LOAD_CURRENT macro where ___b_load_current appears */ - addr[0] = 0x81444000 | rd; /* rd %asr17, reg */ - addr[2] = 0x8130201c | rd | rs1; /* srl reg, 0x1c, reg */ - addr[4] = 0x81282002 | rd | rs1; /* sll reg, 0x2, reg */ - -} - void __init leon_init_smp(void) { /* Patch ipi15 trap table */ t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_leon - linux_trap_ipi15_sun4m); - BTFIXUPSET_BLACKBOX(hard_smp_processor_id, leon_blackbox_id); - BTFIXUPSET_BLACKBOX(load_current, leon_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, - BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 6f294f371d6..2f7cfb5f756 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -192,6 +192,52 @@ extern int root_mountflags; char reboot_command[COMMAND_LINE_SIZE]; +struct cpuid_patch_entry { + unsigned int addr; + unsigned int sun4d[3]; + unsigned int leon[3]; +}; +extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; + +static void __init per_cpu_patch(void) +{ + struct cpuid_patch_entry *p; + + if (sparc_cpu_model == sun4m) { + /* Nothing to do, this is what the unpatched code + * targets. + */ + return; + } + + p = &__cpuid_patch; + while (p < &__cpuid_patch_end) { + unsigned long addr = p->addr; + unsigned int *insns; + + switch (sparc_cpu_model) { + case sun4d: + insns = &p->sun4d[0]; + break; + + case sparc_leon: + insns = &p->leon[0]; + break; + default: + prom_printf("Unknown cpu type, halting.\n"); + prom_halt(); + } + *(unsigned int *) (addr + 0) = insns[0]; + *(unsigned int *) (addr + 4) = insns[1]; + *(unsigned int *) (addr + 8) = insns[2]; + } +#ifdef CONFIG_SMP + local_ops->cache_all(); +#else + sparc32_cachetlb_ops->cache_all(); +#endif +} + enum sparc_cpu sparc_cpu_model; EXPORT_SYMBOL(sparc_cpu_model); @@ -295,6 +341,11 @@ void __init setup_arch(char **cmdline_p) paging_init(); + /* Now that we have the cache ops hooked up, we can patch + * instructions. + */ + per_cpu_patch(); + smp_setup_cpu_possible_map(); } diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c index 4ad7377e2d0..e521c54560f 100644 --- a/arch/sparc/kernel/sparc_ksyms_32.c +++ b/arch/sparc/kernel/sparc_ksyms_32.c @@ -28,10 +28,5 @@ EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(__ret_efault); EXPORT_SYMBOL(empty_zero_page); -/* Defined using magic */ -#ifdef CONFIG_SMP -EXPORT_SYMBOL(BTFIXUP_CALL(__hard_smp_processor_id)); -#endif - /* Exporting a symbol from /init/main.c */ EXPORT_SYMBOL(saved_command_line); diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index f17fd287bf7..38ca0aac2ef 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -52,7 +52,7 @@ static inline void show_leds(int cpuid) void __cpuinit smp4d_callin(void) { - int cpuid = hard_smp4d_processor_id(); + int cpuid = hard_smp_processor_id(); unsigned long flags; /* Show we are alive */ @@ -354,7 +354,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1, /* Running cross calls. */ void smp4d_cross_call_irq(void) { - int i = hard_smp4d_processor_id(); + int i = hard_smp_processor_id(); ccall_info.processors_in[i] = 1; ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3, @@ -365,7 +365,7 @@ void smp4d_cross_call_irq(void) void smp4d_percpu_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs; - int cpu = hard_smp4d_processor_id(); + int cpu = hard_smp_processor_id(); struct clock_event_device *ce; static int cpu_tick[NR_CPUS]; static char led_mask[] = { 0xe, 0xd, 0xb, 0x7, 0xb, 0xd }; @@ -391,24 +391,6 @@ void smp4d_percpu_timer_interrupt(struct pt_regs *regs) set_irq_regs(old_regs); } -void __init smp4d_blackbox_id(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - - addr[0] = 0xc0800800 | rd; /* lda [%g0] ASI_M_VIKING_TMP1, reg */ - addr[1] = 0x01000000; /* nop */ - addr[2] = 0x01000000; /* nop */ -} - -void __init smp4d_blackbox_current(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - - addr[0] = 0xc0800800 | rd; /* lda [%g0] ASI_M_VIKING_TMP1, reg */ - addr[2] = 0x81282002 | rd | (rd >> 11); /* sll reg, 2, reg */ - addr[4] = 0x01000000; /* nop */ -} - void __init sun4d_init_smp(void) { int i; @@ -417,10 +399,7 @@ void __init sun4d_init_smp(void) t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_sun4d - linux_trap_ipi15_sun4m); /* And set btfixup... */ - BTFIXUPSET_BLACKBOX(hard_smp_processor_id, smp4d_blackbox_id); - BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index afcf6743f0e..ff74d33e270 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -281,32 +281,9 @@ void smp4m_percpu_timer_interrupt(struct pt_regs *regs) set_irq_regs(old_regs); } -static void __init smp4m_blackbox_id(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - int rs1 = rd >> 11; - - addr[0] = 0x81580000 | rd; /* rd %tbr, reg */ - addr[1] = 0x8130200c | rd | rs1; /* srl reg, 0xc, reg */ - addr[2] = 0x80082003 | rd | rs1; /* and reg, 3, reg */ -} - -static void __init smp4m_blackbox_current(unsigned *addr) -{ - int rd = *addr & 0x3e000000; - int rs1 = rd >> 11; - - addr[0] = 0x81580000 | rd; /* rd %tbr, reg */ - addr[2] = 0x8130200a | rd | rs1; /* srl reg, 0xa, reg */ - addr[4] = 0x8008200c | rd | rs1; /* and reg, 0xc, reg */ -} - void __init sun4m_init_smp(void) { - BTFIXUPSET_BLACKBOX(hard_smp_processor_id, smp4m_blackbox_id); - BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current); BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S index 691f484e03b..7364ddc9e5a 100644 --- a/arch/sparc/kernel/trampoline_32.S +++ b/arch/sparc/kernel/trampoline_32.S @@ -15,8 +15,8 @@ #include #include - .globl sun4m_cpu_startup, __smp4m_processor_id, __leon_processor_id - .globl sun4d_cpu_startup, __smp4d_processor_id + .globl sun4m_cpu_startup + .globl sun4d_cpu_startup __CPUINIT .align 4 @@ -94,24 +94,6 @@ smp_do_cpu_idle: call cpu_panic nop -__smp4m_processor_id: - rd %tbr, %g2 - srl %g2, 12, %g2 - and %g2, 3, %g2 - retl - mov %g1, %o7 - -__smp4d_processor_id: - lda [%g0] ASI_M_VIKING_TMP1, %g2 - retl - mov %g1, %o7 - -__leon_processor_id: - rd %asr17,%g2 - srl %g2,28,%g2 - retl - mov %g1, %o7 - /* CPUID in bootbus can be found at PA 0xff0140000 */ #define SUN4D_BOOTBUS_CPUID 0xf0140000 -- cgit v1.2.3-70-g09d2 From 1b35a57b1c1781f0fc8fc554f732b3a5408c5244 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 May 2012 11:23:01 -0700 Subject: sparc32: Kill off software 32-bit multiply/divide routines. For the explicit calls to .udiv/.umul in assembler, I made a mechanical (read as: safe) transformation. I didn't attempt to make any simplifications. In particular, __ndelay and __udelay can be simplified significantly. Some of the %y reads are unnecessary and these routines have no need any longer for allocating a register window, they can be leaf functions. Signed-off-by: David S. Miller --- arch/sparc/kernel/entry.S | 27 +-- arch/sparc/kernel/head_32.S | 45 ----- arch/sparc/kernel/kernel.h | 3 - arch/sparc/kernel/module.c | 21 +-- arch/sparc/kernel/muldiv.c | 238 --------------------------- arch/sparc/kernel/traps_32.c | 2 - arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/divdi3.S | 4 +- arch/sparc/lib/ksyms.c | 17 -- arch/sparc/lib/mul.S | 137 --------------- arch/sparc/lib/muldi3.S | 4 +- arch/sparc/lib/rem.S | 384 ------------------------------------------- arch/sparc/lib/sdiv.S | 381 ------------------------------------------ arch/sparc/lib/udiv.S | 357 ---------------------------------------- arch/sparc/lib/udivdi3.S | 3 +- arch/sparc/lib/umul.S | 171 ------------------- arch/sparc/lib/urem.S | 357 ---------------------------------------- 17 files changed, 24 insertions(+), 2129 deletions(-) delete mode 100644 arch/sparc/kernel/muldiv.c delete mode 100644 arch/sparc/lib/mul.S delete mode 100644 arch/sparc/lib/rem.S delete mode 100644 arch/sparc/lib/sdiv.S delete mode 100644 arch/sparc/lib/udiv.S delete mode 100644 arch/sparc/lib/umul.S delete mode 100644 arch/sparc/lib/urem.S (limited to 'arch/sparc/kernel/kernel.h') diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 773f3f05bf2..3f3976e0e98 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1161,11 +1161,13 @@ fpload: .globl __ndelay __ndelay: save %sp, -STACKFRAME_SZ, %sp - mov %i0, %o0 - call .umul ! round multiplier up so large ns ok - mov 0x1ae, %o1 ! 2**32 / (1 000 000 000 / HZ) - call .umul - mov %i1, %o1 ! udelay_val + mov %i0, %o0 ! round multiplier up so large ns ok + mov 0x1ae, %o1 ! 2**32 / (1 000 000 000 / HZ) + umul %o0, %o1, %o0 + rd %y, %o1 + mov %i1, %o1 ! udelay_val + umul %o0, %o1, %o0 + rd %y, %o1 ba delay_continue mov %o1, %o0 ! >>32 later for better resolution @@ -1174,18 +1176,21 @@ __udelay: save %sp, -STACKFRAME_SZ, %sp mov %i0, %o0 sethi %hi(0x10c7), %o1 ! round multiplier up so large us ok - call .umul - or %o1, %lo(0x10c7), %o1 ! 2**32 / 1 000 000 - call .umul - mov %i1, %o1 ! udelay_val + or %o1, %lo(0x10c7), %o1 ! 2**32 / 1 000 000 + umul %o0, %o1, %o0 + rd %y, %o1 + mov %i1, %o1 ! udelay_val + umul %o0, %o1, %o0 + rd %y, %o1 sethi %hi(0x028f4b62), %l0 ! Add in rounding constant * 2**32, or %g0, %lo(0x028f4b62), %l0 addcc %o0, %l0, %o0 ! 2**32 * 0.009 999 bcs,a 3f add %o1, 0x01, %o1 3: - call .umul - mov HZ, %o0 ! >>32 earlier for wider range + mov HZ, %o0 ! >>32 earlier for wider range + umul %o0, %o1, %o0 + rd %y, %o1 delay_continue: cmp %o0, 0x0 diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S index 6c95e9ff871..69645cac54b 100644 --- a/arch/sparc/kernel/head_32.S +++ b/arch/sparc/kernel/head_32.S @@ -746,51 +746,6 @@ sun4d_init: /* Fall through to sun4m_init */ sun4m_init: - -#define PATCH_IT(dst, src) \ - set (dst), %g5; \ - set (src), %g4; \ - ld [%g4], %g3; \ - st %g3, [%g5]; \ - ld [%g4+0x4], %g3; \ - st %g3, [%g5+0x4]; - - /* Signed multiply. */ - PATCH_IT(.mul, .mul_patch) - PATCH_IT(.mul+0x08, .mul_patch+0x08) - - /* Signed remainder. */ - PATCH_IT(.rem, .rem_patch) - PATCH_IT(.rem+0x08, .rem_patch+0x08) - PATCH_IT(.rem+0x10, .rem_patch+0x10) - PATCH_IT(.rem+0x18, .rem_patch+0x18) - PATCH_IT(.rem+0x20, .rem_patch+0x20) - PATCH_IT(.rem+0x28, .rem_patch+0x28) - - /* Signed division. */ - PATCH_IT(.div, .div_patch) - PATCH_IT(.div+0x08, .div_patch+0x08) - PATCH_IT(.div+0x10, .div_patch+0x10) - PATCH_IT(.div+0x18, .div_patch+0x18) - PATCH_IT(.div+0x20, .div_patch+0x20) - - /* Unsigned multiply. */ - PATCH_IT(.umul, .umul_patch) - PATCH_IT(.umul+0x08, .umul_patch+0x08) - - /* Unsigned remainder. */ - PATCH_IT(.urem, .urem_patch) - PATCH_IT(.urem+0x08, .urem_patch+0x08) - PATCH_IT(.urem+0x10, .urem_patch+0x10) - PATCH_IT(.urem+0x18, .urem_patch+0x18) - - /* Unsigned division. */ - PATCH_IT(.udiv, .udiv_patch) - PATCH_IT(.udiv+0x08, .udiv_patch+0x08) - PATCH_IT(.udiv+0x10, .udiv_patch+0x10) - -#undef PATCH_IT - /* Ok, the PROM could have done funny things and apple cider could still * be sitting in the fault status/address registers. Read them all to * clear them so we don't get magic faults later on. diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 1c1a7d39c7e..a86372d3458 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -32,9 +32,6 @@ extern void cpu_probe(void); /* traps_32.c */ extern void handle_hw_divzero(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr); -/* muldiv.c */ -extern int do_user_muldiv (struct pt_regs *, unsigned long); - /* irq_32.c */ extern struct irqaction static_irqaction[]; extern int static_irq_count; diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 276359e1ff5..15e0a169397 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -32,26 +32,11 @@ static void *module_map(unsigned long size) GFP_KERNEL, PAGE_KERNEL, -1, __builtin_return_address(0)); } - -static char *dot2underscore(char *name) -{ - return name; -} #else static void *module_map(unsigned long size) { return vmalloc(size); } - -/* Replace references to .func with _Func */ -static char *dot2underscore(char *name) -{ - if (name[0] == '.') { - name[0] = '_'; - name[1] = toupper(name[1]); - } - return name; -} #endif /* CONFIG_SPARC64 */ void *module_alloc(unsigned long size) @@ -93,12 +78,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, for (i = 1; i < sechdrs[symidx].sh_size / sizeof(Elf_Sym); i++) { if (sym[i].st_shndx == SHN_UNDEF) { - if (ELF_ST_TYPE(sym[i].st_info) == STT_REGISTER) { + if (ELF_ST_TYPE(sym[i].st_info) == STT_REGISTER) sym[i].st_shndx = SHN_ABS; - } else { - char *name = strtab + sym[i].st_name; - dot2underscore(name); - } } } return 0; diff --git a/arch/sparc/kernel/muldiv.c b/arch/sparc/kernel/muldiv.c deleted file mode 100644 index f7db516b07d..00000000000 --- a/arch/sparc/kernel/muldiv.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - * muldiv.c: Hardware multiply/division illegal instruction trap - * for sun4c/sun4 (which do not have those instructions) - * - * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - * - * 2004-12-25 Krzysztof Helt (krzysztof.h1@wp.pl) - * - fixed registers constrains in inline assembly declarations - */ - -#include -#include -#include -#include -#include -#include - -#include "kernel.h" - -/* #define DEBUG_MULDIV */ - -static inline int has_imm13(int insn) -{ - return (insn & 0x2000); -} - -static inline int is_foocc(int insn) -{ - return (insn & 0x800000); -} - -static inline int sign_extend_imm13(int imm) -{ - return imm << 19 >> 19; -} - -static inline void advance(struct pt_regs *regs) -{ - regs->pc = regs->npc; - regs->npc += 4; -} - -static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, - unsigned int rd) -{ - if(rs2 >= 16 || rs1 >= 16 || rd >= 16) { - /* Wheee... */ - __asm__ __volatile__("save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "save %sp, -0x40, %sp\n\t" - "restore; restore; restore; restore;\n\t" - "restore; restore; restore;\n\t"); - } -} - -#define fetch_reg(reg, regs) ({ \ - struct reg_window32 __user *win; \ - register unsigned long ret; \ - \ - if (!(reg)) ret = 0; \ - else if ((reg) < 16) { \ - ret = regs->u_regs[(reg)]; \ - } else { \ - /* Ho hum, the slightly complicated case. */ \ - win = (struct reg_window32 __user *)regs->u_regs[UREG_FP];\ - if (get_user (ret, &win->locals[(reg) - 16])) return -1;\ - } \ - ret; \ -}) - -static inline int -store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs) -{ - struct reg_window32 __user *win; - - if (!reg) - return 0; - if (reg < 16) { - regs->u_regs[reg] = result; - return 0; - } else { - /* need to use put_user() in this case: */ - win = (struct reg_window32 __user *) regs->u_regs[UREG_FP]; - return (put_user(result, &win->locals[reg - 16])); - } -} - -/* Should return 0 if mul/div emulation succeeded and SIGILL should - * not be issued. - */ -int do_user_muldiv(struct pt_regs *regs, unsigned long pc) -{ - unsigned int insn; - int inst; - unsigned int rs1, rs2, rdv; - - if (!pc) - return -1; /* This happens to often, I think */ - if (get_user (insn, (unsigned int __user *)pc)) - return -1; - if ((insn & 0xc1400000) != 0x80400000) - return -1; - inst = ((insn >> 19) & 0xf); - if ((inst & 0xe) != 10 && (inst & 0xe) != 14) - return -1; - - /* Now we know we have to do something with umul, smul, udiv or sdiv */ - rs1 = (insn >> 14) & 0x1f; - rs2 = insn & 0x1f; - rdv = (insn >> 25) & 0x1f; - if (has_imm13(insn)) { - maybe_flush_windows(rs1, 0, rdv); - rs2 = sign_extend_imm13(insn); - } else { - maybe_flush_windows(rs1, rs2, rdv); - rs2 = fetch_reg(rs2, regs); - } - rs1 = fetch_reg(rs1, regs); - switch (inst) { - case 10: /* umul */ -#ifdef DEBUG_MULDIV - printk ("unsigned muldiv: 0x%x * 0x%x = ", rs1, rs2); -#endif - __asm__ __volatile__ ("\n\t" - "mov %0, %%o0\n\t" - "call .umul\n\t" - " mov %1, %%o1\n\t" - "mov %%o0, %0\n\t" - "mov %%o1, %1\n\t" - : "=r" (rs1), "=r" (rs2) - : "0" (rs1), "1" (rs2) - : "o0", "o1", "o2", "o3", "o4", "o5", "o7", "cc"); -#ifdef DEBUG_MULDIV - printk ("0x%x%08x\n", rs2, rs1); -#endif - if (store_reg(rs1, rdv, regs)) - return -1; - regs->y = rs2; - break; - case 11: /* smul */ -#ifdef DEBUG_MULDIV - printk ("signed muldiv: 0x%x * 0x%x = ", rs1, rs2); -#endif - __asm__ __volatile__ ("\n\t" - "mov %0, %%o0\n\t" - "call .mul\n\t" - " mov %1, %%o1\n\t" - "mov %%o0, %0\n\t" - "mov %%o1, %1\n\t" - : "=r" (rs1), "=r" (rs2) - : "0" (rs1), "1" (rs2) - : "o0", "o1", "o2", "o3", "o4", "o5", "o7", "cc"); -#ifdef DEBUG_MULDIV - printk ("0x%x%08x\n", rs2, rs1); -#endif - if (store_reg(rs1, rdv, regs)) - return -1; - regs->y = rs2; - break; - case 14: /* udiv */ -#ifdef DEBUG_MULDIV - printk ("unsigned muldiv: 0x%x%08x / 0x%x = ", regs->y, rs1, rs2); -#endif - if (!rs2) { -#ifdef DEBUG_MULDIV - printk ("DIVISION BY ZERO\n"); -#endif - handle_hw_divzero (regs, pc, regs->npc, regs->psr); - return 0; - } - __asm__ __volatile__ ("\n\t" - "mov %2, %%o0\n\t" - "mov %0, %%o1\n\t" - "mov %%g0, %%o2\n\t" - "call __udivdi3\n\t" - " mov %1, %%o3\n\t" - "mov %%o1, %0\n\t" - "mov %%o0, %1\n\t" - : "=r" (rs1), "=r" (rs2) - : "r" (regs->y), "0" (rs1), "1" (rs2) - : "o0", "o1", "o2", "o3", "o4", "o5", "o7", - "g1", "g2", "g3", "cc"); -#ifdef DEBUG_MULDIV - printk ("0x%x\n", rs1); -#endif - if (store_reg(rs1, rdv, regs)) - return -1; - break; - case 15: /* sdiv */ -#ifdef DEBUG_MULDIV - printk ("signed muldiv: 0x%x%08x / 0x%x = ", regs->y, rs1, rs2); -#endif - if (!rs2) { -#ifdef DEBUG_MULDIV - printk ("DIVISION BY ZERO\n"); -#endif - handle_hw_divzero (regs, pc, regs->npc, regs->psr); - return 0; - } - __asm__ __volatile__ ("\n\t" - "mov %2, %%o0\n\t" - "mov %0, %%o1\n\t" - "mov %%g0, %%o2\n\t" - "call __divdi3\n\t" - " mov %1, %%o3\n\t" - "mov %%o1, %0\n\t" - "mov %%o0, %1\n\t" - : "=r" (rs1), "=r" (rs2) - : "r" (regs->y), "0" (rs1), "1" (rs2) - : "o0", "o1", "o2", "o3", "o4", "o5", "o7", - "g1", "g2", "g3", "cc"); -#ifdef DEBUG_MULDIV - printk ("0x%x\n", rs1); -#endif - if (store_reg(rs1, rdv, regs)) - return -1; - break; - } - if (is_foocc (insn)) { - regs->psr &= ~PSR_ICC; - if ((inst & 0xe) == 14) { - /* ?div */ - if (rs2) regs->psr |= PSR_V; - } - if (!rs1) regs->psr |= PSR_Z; - if (((int)rs1) < 0) regs->psr |= PSR_N; -#ifdef DEBUG_MULDIV - printk ("psr muldiv: %08x\n", regs->psr); -#endif - } - advance(regs); - return 0; -} diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index d2de2133314..a5785ea2a85 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -120,8 +120,6 @@ void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned lon printk("Ill instr. at pc=%08lx instruction is %08lx\n", regs->pc, *(unsigned long *)regs->pc); #endif - if (!do_user_muldiv (regs, pc)) - return; info.si_signo = SIGILL; info.si_errno = 0; diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 33d8d85ad59..ead6df25054 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -4,7 +4,7 @@ asflags-y := -ansi -DST_DIV0=0x02 ccflags-y := -Werror -lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o +lib-$(CONFIG_SPARC32) += ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o lib-y += strlen.o lib-y += checksum_$(BITS).o diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index d74bc0925f2..9614b48b6ef 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -19,7 +19,6 @@ Boston, MA 02111-1307, USA. */ .text .align 4 - .global .udiv .globl __divdi3 __divdi3: save %sp,-104,%sp @@ -83,8 +82,9 @@ __divdi3: bne .LL85 mov %i0,%o2 mov 1,%o0 - call .udiv,0 mov 0,%o1 + wr %g0, 0, %y + udiv %o0, %o1, %o0 mov %o0,%o4 mov %i0,%o2 .LL85: diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1bc8972f029..2dc30875c8b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -61,16 +61,6 @@ extern void ___rw_read_try(void); extern void ___rw_read_exit(void); extern void ___rw_write_enter(void); -/* Alias functions whose names begin with "." and export the aliases. - * The module references will be fixed up by module_frob_arch_sections. - */ -extern int _Div(int, int); -extern int _Mul(int, int); -extern int _Rem(int, int); -extern unsigned _Udiv(unsigned, unsigned); -extern unsigned _Umul(unsigned, unsigned); -extern unsigned _Urem(unsigned, unsigned); - /* Networking helper routines. */ EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); @@ -95,13 +85,6 @@ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__divdi3); - -EXPORT_SYMBOL(_Rem); -EXPORT_SYMBOL(_Urem); -EXPORT_SYMBOL(_Mul); -EXPORT_SYMBOL(_Umul); -EXPORT_SYMBOL(_Div); -EXPORT_SYMBOL(_Udiv); #endif /* diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S deleted file mode 100644 index c45470d0b0c..00000000000 --- a/arch/sparc/lib/mul.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * mul.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - -/* - * Signed multiply, from Appendix E of the Sparc Version 8 - * Architecture Manual. - */ - -/* - * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of - * the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. - */ - - .globl .mul - .globl _Mul -.mul: -_Mul: /* needed for export */ - mov %o0, %y ! multiplier -> Y - andncc %o0, 0xfff, %g0 ! test bits 12..31 - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - ! If %o0 was negative, the result is - ! (%o0 * %o1) + (%o1 << 32)) - ! We fix that here. - -#if 0 - tst %o0 - bge 1f - rd %y, %o0 - - ! %o0 was indeed negative; fix upper 32 bits of result by subtracting - ! %o1 (i.e., return %o4 - %o1 in %o1). - retl - sub %o4, %o1, %o1 - -1: - retl - mov %o4, %o1 -#else - /* Faster code adapted from tege@sics.se's code for umul.S. */ - sra %o0, 31, %o2 ! make mask from sign bit - and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0 - rd %y, %o0 ! get lower half of product - retl - sub %o4, %o2, %o1 ! subtract compensation - ! and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the low part of the - * result; %y has the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * --hi-- ----low-part---- - * - * The upper 12 bits of %o4 should be sign-extended to form the - * high part of the product (i.e., highpart = %o4 >> 20). - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left - or %o5, %o0, %o0 ! construct low part of result - retl - sra %o4, 20, %o1 ! ... and extract high part of result - - .globl .mul_patch -.mul_patch: - smul %o0, %o1, %o0 - retl - rd %y, %o1 - nop diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S index 7f17872d060..9794939d1c1 100644 --- a/arch/sparc/lib/muldi3.S +++ b/arch/sparc/lib/muldi3.S @@ -63,12 +63,12 @@ __muldi3: rd %y, %o1 mov %o1, %l3 mov %i1, %o0 - call .umul mov %i2, %o1 + umul %o0, %o1, %o0 mov %o0, %l0 mov %i0, %o0 - call .umul mov %i3, %o1 + umul %o0, %o1, %o0 add %l0, %o0, %l0 mov %l2, %i0 add %l2, %l0, %i0 diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S deleted file mode 100644 index 42fb8625281..00000000000 --- a/arch/sparc/lib/rem.S +++ /dev/null @@ -1,384 +0,0 @@ -/* - * rem.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .rem name of function to generate - * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .rem - .globl _Rem -.rem: -_Rem: /* needed for export */ - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - mov %o0, %g2 ! compute sign in any case - - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -Lgot_result: - ! check to see if answer should be < 0 - tst %g2 - bl,a 1f - sub %g0, %o3, %o3 -1: - retl - mov %o3, %o0 - - .globl .rem_patch -.rem_patch: - sra %o0, 0x1f, %o4 - wr %o4, 0x0, %y - nop - nop - nop - sdivcc %o0, %o1, %o2 - bvs,a 1f - xnor %o2, %g0, %o2 -1: smul %o2, %o1, %o2 - retl - sub %o0, %o2, %o0 - nop diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S deleted file mode 100644 index f0a0d4e4db7..00000000000 --- a/arch/sparc/lib/sdiv.S +++ /dev/null @@ -1,381 +0,0 @@ -/* - * sdiv.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .div - .globl _Div -.div: -_Div: /* needed for export */ - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - xor %o1, %o0, %g2 ! compute sign in any case - - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - -Lgot_result: - ! check to see if answer should be < 0 - tst %g2 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 - - .globl .div_patch -.div_patch: - sra %o0, 0x1f, %o2 - wr %o2, 0x0, %y - nop - nop - nop - sdivcc %o0, %o1, %o0 - bvs,a 1f - xnor %o0, %g0, %o0 -1: retl - nop diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S deleted file mode 100644 index 2101405bdfc..00000000000 --- a/arch/sparc/lib/udiv.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * udiv.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .udiv name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .udiv - .globl _Udiv -.udiv: -_Udiv: /* needed for export */ - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - -Lgot_result: - - retl - mov %o2, %o0 - - .globl .udiv_patch -.udiv_patch: - wr %g0, 0x0, %y - nop - nop - retl - udiv %o0, %o1, %o0 - nop diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S index b430f1f0ef6..24e0a355e2e 100644 --- a/arch/sparc/lib/udivdi3.S +++ b/arch/sparc/lib/udivdi3.S @@ -60,8 +60,9 @@ __udivdi3: bne .LL77 mov %i0,%o2 mov 1,%o0 - call .udiv,0 mov 0,%o1 + wr %g0, 0, %y + udiv %o0, %o1, %o0 mov %o0,%o3 mov %i0,%o2 .LL77: diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S deleted file mode 100644 index 1f36ae68252..00000000000 --- a/arch/sparc/lib/umul.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * umul.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* - * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - * call .umul - * nop - * bnz overflow (or tnz) - */ - - .globl .umul - .globl _Umul -.umul: -_Umul: /* needed for export */ - or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - - /* - * Normally, with the shift-and-add approach, if both numbers are - * positive you get the correct result. With 32-bit two's-complement - * numbers, -x is represented as - * - * x 32 - * ( 2 - ------ ) mod 2 * 2 - * 32 - * 2 - * - * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, - * we can treat this as if the radix point were just to the left - * of the sign bit (multiply by 2^32), and get - * - * -x = (2 - x) mod 2 - * - * Then, ignoring the `mod 2's for convenience: - * - * x * y = xy - * -x * y = 2y - xy - * x * -y = 2x - xy - * -x * -y = 4 - 2x - 2y + xy - * - * For signed multiplies, we subtract (x << 32) from the partial - * product to fix this problem for negative multipliers (see mul.s). - * Because of the way the shift into the partial product is calculated - * (N xor V), this term is automatically removed for the multiplicand, - * so we don't have to adjust. - * - * But for unsigned multiplies, the high order bit wasn't a sign bit, - * and the correction is wrong. So for unsigned multiplies where the - * high order bit is one, we end up with xy - (y << 32). To fix it - * we add y << 32. - */ -#if 0 - tst %o1 - bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) - -1: - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 -#else - /* Faster code from tege@sics.se. */ - sra %o1, 31, %o2 ! make mask from sign bit - and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %o2, %o1 ! add compensation and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above), - * and overflow is impossible (the answer is at most 24 bits long). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the result; %y has - * the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * -----result----- - * - * The 12 bits of %o4 left of the `result' area are all zero; - * in fact, all top 20 bits of %o4 are zero. - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20 - or %o5, %o0, %o0 - retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z - - .globl .umul_patch -.umul_patch: - umul %o0, %o1, %o0 - retl - rd %y, %o1 - nop diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S deleted file mode 100644 index 77123eb83c4..00000000000 --- a/arch/sparc/lib/urem.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * urem.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .urem name of function to generate - * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - .globl .urem - .globl _Urem -.urem: -_Urem: /* needed for export */ - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -Lgot_result: - - retl - mov %o3, %o0 - - .globl .urem_patch -.urem_patch: - wr %g0, 0x0, %y - nop - nop - nop - udiv %o0, %o1, %o2 - umul %o2, %o1, %o2 - retl - sub %o0, %o2, %o0 -- cgit v1.2.3-70-g09d2