diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 11:16:33 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 11:16:33 +0200 |
commit | 44f0257fc316ff4b33aa3438dd8d891b7d6d72b9 (patch) | |
tree | c1a9a571db37d631489f18e1dfe5554874b19027 /arch/x86/lib | |
parent | da957e111bb0c189a4a3bf8a00caaecb59ed94ca (diff) |
i386: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/lib/Makefile_32 | 11 | ||||
-rw-r--r-- | arch/x86/lib/bitops_32.c | 70 | ||||
-rw-r--r-- | arch/x86/lib/checksum_32.S | 546 | ||||
-rw-r--r-- | arch/x86/lib/delay_32.c | 103 | ||||
-rw-r--r-- | arch/x86/lib/getuser_32.S | 78 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_32.c | 43 | ||||
-rw-r--r-- | arch/x86/lib/mmx_32.c | 403 | ||||
-rw-r--r-- | arch/x86/lib/msr-on-cpu.c | 119 | ||||
-rw-r--r-- | arch/x86/lib/putuser_32.S | 98 | ||||
-rw-r--r-- | arch/x86/lib/semaphore_32.S | 219 | ||||
-rw-r--r-- | arch/x86/lib/string_32.c | 257 | ||||
-rw-r--r-- | arch/x86/lib/strstr_32.c | 31 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_32.c | 882 |
14 files changed, 2865 insertions, 0 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile new file mode 100644 index 00000000000..2d7d724a2a6 --- /dev/null +++ b/arch/x86/lib/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/lib/Makefile_32 +else +include ${srctree}/arch/x86_64/lib/Makefile_64 +endif diff --git a/arch/x86/lib/Makefile_32 b/arch/x86/lib/Makefile_32 new file mode 100644 index 00000000000..98d1f1e2e2e --- /dev/null +++ b/arch/x86/lib/Makefile_32 @@ -0,0 +1,11 @@ +# +# Makefile for i386-specific library files.. +# + + +lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \ + bitops_32.o semaphore_32.o string_32.o + +lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o + +obj-$(CONFIG_SMP) += msr-on-cpu.o diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c new file mode 100644 index 00000000000..afd0045595d --- /dev/null +++ b/arch/x86/lib/bitops_32.c @@ -0,0 +1,70 @@ +#include <linux/bitops.h> +#include <linux/module.h> + +/** + * find_next_bit - find the first set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +int find_next_bit(const unsigned long *addr, int size, int offset) +{ + const unsigned long *p = addr + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for nonzero in the first 32 bits: + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (*p >> bit)); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No set bit yet, search remaining full words for a bit + */ + res = find_first_bit (p, size - 32 * (p - addr)); + return (offset + set + res); +} +EXPORT_SYMBOL(find_next_bit); + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +int find_next_zero_bit(const unsigned long *addr, int size, int offset) +{ + const unsigned long *p = addr + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in the first 32 bits. + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit(p, size - 32 * (p - addr)); + return (offset + set + res); +} +EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S new file mode 100644 index 00000000000..adbccd0bbb7 --- /dev/null +++ b/arch/x86/lib/checksum_32.S @@ -0,0 +1,546 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Tom May, <ftom@netcom.com> + * Pentium Pro/II routines: + * Alexander Kjeldaas <astor@guardian.no> + * Finn Arne Gangstad <finnag@guardian.no> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/linkage.h> +#include <asm/dwarf2.h> +#include <asm/errno.h> + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +ENTRY(csum_partial) + CFI_STARTPROC + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $3, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + testl $1, %esi # Check alignment. + jz 10f # Jump if alignment is boundary of 2bytes. + + # buf is odd + dec %ecx + jl 8f + movzbl (%esi), %ebx + adcl %ebx, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 2f +10: + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + testl $1, 12(%esp) + jz 8f + roll $8, %eax +8: + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + ret + CFI_ENDPROC +ENDPROC(csum_partial) + +#else + +/* Version for PentiumII/PPro */ + +ENTRY(csum_partial) + CFI_STARTPROC + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $3, %esi + jnz 25f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b +25: + testl $1, %esi + jz 30f + # buf is odd + dec %ecx + jl 90f + movzbl (%esi), %ebx + addl %ebx, %eax + adcl $0, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 10b + +30: subl $2, %ecx + ja 20b + je 32f + addl $2, %ecx + jz 80f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + testl $1, 12(%esp) + jz 90f + roll $8, %eax +90: + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + ret + CFI_ENDPROC +ENDPROC(csum_partial) + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +ENTRY(csum_partial_copy_generic) + CFI_STARTPROC + subl $4,%esp + CFI_ADJUST_CFA_OFFSET 4 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ecx # equivalent to addl $4,%esp + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC +ENDPROC(csum_partial_copy_generic) + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +ENTRY(csum_partial_copy_generic) + CFI_STARTPROC + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + ret + CFI_ENDPROC +ENDPROC(csum_partial_copy_generic) + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c new file mode 100644 index 00000000000..f6edb11364d --- /dev/null +++ b/arch/x86/lib/delay_32.c @@ -0,0 +1,103 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> + +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/timer.h> + +#ifdef CONFIG_SMP +# include <asm/smp.h> +#endif + +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + int d0; + + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscl(*timer_val); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops), "0" + (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} + +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S new file mode 100644 index 00000000000..6d84b53f12a --- /dev/null +++ b/arch/x86/lib/getuser_32.S @@ -0,0 +1,78 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include <linux/linkage.h> +#include <asm/dwarf2.h> +#include <asm/thread_info.h> + + +/* + * __get_user_X + * + * Inputs: %eax contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * %edx contains zero-extended value + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +.text +ENTRY(__get_user_1) + CFI_STARTPROC + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +1: movzbl (%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) + CFI_STARTPROC + addl $1,%eax + jc bad_get_user + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +2: movzwl -1(%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) + CFI_STARTPROC + addl $3,%eax + jc bad_get_user + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +3: movl -3(%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_4) + +bad_get_user: + CFI_STARTPROC + xorl %edx,%edx + movl $-14,%eax + ret + CFI_ENDPROC +END(bad_get_user) + +.section __ex_table,"a" + .long 1b,bad_get_user + .long 2b,bad_get_user + .long 3b,bad_get_user +.previous diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c new file mode 100644 index 00000000000..8ac51b82a63 --- /dev/null +++ b/arch/x86/lib/memcpy_32.c @@ -0,0 +1,43 @@ +#include <linux/string.h> +#include <linux/module.h> + +#undef memcpy +#undef memset + +void *memcpy(void *to, const void *from, size_t n) +{ +#ifdef CONFIG_X86_USE_3DNOW + return __memcpy3d(to, from, n); +#else + return __memcpy(to, from, n); +#endif +} +EXPORT_SYMBOL(memcpy); + +void *memset(void *s, int c, size_t count) +{ + return __memset(s, c, count); +} +EXPORT_SYMBOL(memset); + +void *memmove(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + + if (dest < src) { + memcpy(dest,src,n); + } else { + __asm__ __volatile__( + "std\n\t" + "rep\n\t" + "movsb\n\t" + "cld" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n), + "1" (n-1+(const char *)src), + "2" (n-1+(char *)dest) + :"memory"); + } + return dest; +} +EXPORT_SYMBOL(memmove); diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c new file mode 100644 index 00000000000..28084d2e8dd --- /dev/null +++ b/arch/x86/lib/mmx_32.c @@ -0,0 +1,403 @@ +#include <linux/types.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/module.h> + +#include <asm/i387.h> + + +/* + * MMX 3DNow! library helper functions + * + * To do: + * We can use MMX just for prefetch in IRQ's. This may be a win. + * (reported so on K6-III) + * We should use a better code neutral filler for the short jump + * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? + * We also want to clobber the filler register so we don't get any + * register forwarding stalls on the filler. + * + * Add *user handling. Checksums are not a win with MMX on any CPU + * tested so far for any MMX solution figured. + * + * 22/09/2000 - Arjan van de Ven + * Improved for non-egineering-sample Athlons + * + */ + +void *_mmx_memcpy(void *to, const void *from, size_t len) +{ + void *p; + int i; + + if (unlikely(in_interrupt())) + return __memcpy(to, from, len); + + p = to; + i = len >> 6; /* len/64 */ + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" /* This set is 28 bytes */ + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + + for(; i>5; i--) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + + for(; i>0; i--) + { + __asm__ __volatile__ ( + " movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* + * Now do the tail of the block + */ + __memcpy(to, from, len&63); + kernel_fpu_end(); + return p; +} + +#ifdef CONFIG_MK7 + +/* + * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and + * other MMX using processors do not. + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/64;i++) + { + __asm__ __volatile__ ( + " movntq %%mm0, (%0)\n" + " movntq %%mm0, 8(%0)\n" + " movntq %%mm0, 16(%0)\n" + " movntq %%mm0, 24(%0)\n" + " movntq %%mm0, 32(%0)\n" + " movntq %%mm0, 40(%0)\n" + " movntq %%mm0, 48(%0)\n" + " movntq %%mm0, 56(%0)\n" + : : "r" (page) : "memory"); + page+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + kernel_fpu_begin(); + + /* maybe the prefetch stuff can go before the expensive fnsave... + * but that is for later. -AV + */ + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<(4096-320)/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +#else + +/* + * Generic MMX implementation without K7 specific streaming + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + kernel_fpu_end(); +} + + +#endif + +/* + * Favour MMX for page clear and copy. + */ + +static void slow_zero_page(void * page) +{ + int d0, d1; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; stosl" \ + : "=&c" (d0), "=&D" (d1) + :"a" (0),"1" (page),"0" (1024) + :"memory"); +} + +void mmx_clear_page(void * page) +{ + if(unlikely(in_interrupt())) + slow_zero_page(page); + else + fast_clear_page(page); +} + +static void slow_copy_page(void *to, void *from) +{ + int d0, d1, d2; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; movsl" \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (1024),"1" ((long) to),"2" ((long) from) \ + : "memory"); +} + + +void mmx_copy_page(void *to, void *from) +{ + if(unlikely(in_interrupt())) + slow_copy_page(to, from); + else + fast_copy_page(to, from); +} + +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c new file mode 100644 index 00000000000..7767962f25d --- /dev/null +++ b/arch/x86/lib/msr-on-cpu.c @@ -0,0 +1,119 @@ +#include <linux/module.h> +#include <linux/preempt.h> +#include <linux/smp.h> +#include <asm/msr.h> + +struct msr_info { + u32 msr_no; + u32 l, h; + int err; +}; + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rdmsr(rv->msr_no, rv->l, rv->h); +} + +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); +} + +static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) +{ + int err = 0; + preempt_disable(); + if (smp_processor_id() == cpu) + if (safe) + err = rdmsr_safe(msr_no, l, h); + else + rdmsr(msr_no, *l, *h); + else { + struct msr_info rv; + + rv.msr_no = msr_no; + if (safe) { + smp_call_function_single(cpu, __rdmsr_safe_on_cpu, + &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); + } + *l = rv.l; + *h = rv.h; + } + preempt_enable(); + return err; +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + + wrmsr(rv->msr_no, rv->l, rv->h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); +} + +static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) +{ + int err = 0; + preempt_disable(); + if (smp_processor_id() == cpu) + if (safe) + err = wrmsr_safe(msr_no, l, h); + else + wrmsr(msr_no, l, h); + else { + struct msr_info rv; + + rv.msr_no = msr_no; + rv.l = l; + rv.h = h; + if (safe) { + smp_call_function_single(cpu, __wrmsr_safe_on_cpu, + &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); + } + } + preempt_enable(); + return err; +} + +void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + _wrmsr_on_cpu(cpu, msr_no, l, h, 0); +} + +void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + _rdmsr_on_cpu(cpu, msr_no, l, h, 0); +} + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); +} + +EXPORT_SYMBOL(rdmsr_on_cpu); +EXPORT_SYMBOL(wrmsr_on_cpu); +EXPORT_SYMBOL(rdmsr_safe_on_cpu); +EXPORT_SYMBOL(wrmsr_safe_on_cpu); diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S new file mode 100644 index 00000000000..f58fba109d1 --- /dev/null +++ b/arch/x86/lib/putuser_32.S @@ -0,0 +1,98 @@ +/* + * __put_user functions. + * + * (C) Copyright 2005 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include <linux/linkage.h> +#include <asm/dwarf2.h> +#include <asm/thread_info.h> + + +/* + * __put_user_X + * + * Inputs: %eax[:%edx] contains the data + * %ecx contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#define ENTER CFI_STARTPROC ; \ + pushl %ebx ; \ + CFI_ADJUST_CFA_OFFSET 4 ; \ + CFI_REL_OFFSET ebx, 0 ; \ + GET_THREAD_INFO(%ebx) +#define EXIT popl %ebx ; \ + CFI_ADJUST_CFA_OFFSET -4 ; \ + CFI_RESTORE ebx ; \ + ret ; \ + CFI_ENDPROC + +.text +ENTRY(__put_user_1) + ENTER + cmpl TI_addr_limit(%ebx),%ecx + jae bad_put_user +1: movb %al,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $1,%ebx + cmpl %ebx,%ecx + jae bad_put_user +2: movw %ax,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $3,%ebx + cmpl %ebx,%ecx + jae bad_put_user +3: movl %eax,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $7,%ebx + cmpl %ebx,%ecx + jae bad_put_user +4: movl %eax,(%ecx) +5: movl %edx,4(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_8) + +bad_put_user: + CFI_STARTPROC simple + CFI_DEF_CFA esp, 2*4 + CFI_OFFSET eip, -1*4 + CFI_OFFSET ebx, -2*4 + movl $-14,%eax + EXIT +END(bad_put_user) + +.section __ex_table,"a" + .long 1b,bad_put_user + .long 2b,bad_put_user + .long 3b,bad_put_user + .long 4b,bad_put_user + .long 5b,bad_put_user +.previous diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S new file mode 100644 index 00000000000..c01eb39c0b4 --- /dev/null +++ b/arch/x86/lib/semaphore_32.S @@ -0,0 +1,219 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> + */ + +#include <linux/linkage.h> +#include <asm/rwlock.h> +#include <asm/alternative-asm.i> +#include <asm/frame.i> +#include <asm/dwarf2.h> + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %eax contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax whish is either a return + * value or just clobbered.. + */ + .section .sched.text +ENTRY(__down_failed) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed) + +ENTRY(__down_failed_interruptible) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_interruptible + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_interruptible) + +ENTRY(__down_failed_trylock) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_trylock + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_trylock) + +ENTRY(__up_wakeup) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __up + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__up_wakeup) + +/* + * rw spinlock fallbacks + */ +#ifdef CONFIG_SMP +ENTRY(__write_lock_failed) + CFI_STARTPROC simple + FRAME +2: LOCK_PREFIX + addl $ RW_LOCK_BIAS,(%eax) +1: rep; nop + cmpl $ RW_LOCK_BIAS,(%eax) + jne 1b + LOCK_PREFIX + subl $ RW_LOCK_BIAS,(%eax) + jnz 2b + ENDFRAME + ret + CFI_ENDPROC + END(__write_lock_failed) + +ENTRY(__read_lock_failed) + CFI_STARTPROC + FRAME +2: LOCK_PREFIX + incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + LOCK_PREFIX + decl (%eax) + js 2b + ENDFRAME + ret + CFI_ENDPROC + END(__read_lock_failed) + +#endif + +#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_down_read_failed + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + calll rwsem_down_write_failed + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + CFI_STARTPROC + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call rwsem_wake + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 +1: ret + CFI_ENDPROC + END(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_downgrade_wake + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_downgrade_wake) + +#endif diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c new file mode 100644 index 00000000000..2c773fefa3d --- /dev/null +++ b/arch/x86/lib/string_32.c @@ -0,0 +1,257 @@ +/* + * Most of the string-functions are rather heavily hand-optimized, + * see especially strsep,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. String instructions have been + * used through-out, making for "slightly" unclear code :-) + * + * AK: On P4 and K7 using non string instruction implementations might be faster + * for large memory blocks. But most of them are unlikely to be used on large + * strings. + */ + +#include <linux/string.h> +#include <linux/module.h> + +#ifdef __HAVE_ARCH_STRCPY +char *strcpy(char * dest,const char *src) +{ + int d0, d1, d2; + asm volatile( "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2) + :"0" (src),"1" (dest) : "memory"); + return dest; +} +EXPORT_SYMBOL(strcpy); +#endif + +#ifdef __HAVE_ARCH_STRNCPY +char *strncpy(char * dest,const char *src,size_t count) +{ + int d0, d1, d2, d3; + asm volatile( "1:\tdecl %2\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "rep\n\t" + "stosb\n" + "2:" + : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) + :"0" (src),"1" (dest),"2" (count) : "memory"); + return dest; +} +EXPORT_SYMBOL(strncpy); +#endif + +#ifdef __HAVE_ARCH_STRCAT +char *strcat(char * dest,const char * src) +{ + int d0, d1, d2, d3; + asm volatile( "repne\n\t" + "scasb\n\t" + "decl %1\n" + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); + return dest; +} +EXPORT_SYMBOL(strcat); +#endif + +#ifdef __HAVE_ARCH_STRNCAT +char *strncat(char * dest,const char * src,size_t count) +{ + int d0, d1, d2, d3; + asm volatile( "repne\n\t" + "scasb\n\t" + "decl %1\n\t" + "movl %8,%3\n" + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %2,%2\n\t" + "stosb" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) + : "memory"); + return dest; +} +EXPORT_SYMBOL(strncat); +#endif + +#ifdef __HAVE_ARCH_STRCMP +int strcmp(const char * cs,const char * ct) +{ + int d0, d1; + int res; + asm volatile( "1:\tlodsb\n\t" + "scasb\n\t" + "jne 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "jmp 3f\n" + "2:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "3:" + :"=a" (res), "=&S" (d0), "=&D" (d1) + :"1" (cs),"2" (ct) + :"memory"); + return res; +} +EXPORT_SYMBOL(strcmp); +#endif + +#ifdef __HAVE_ARCH_STRNCMP +int strncmp(const char * cs,const char * ct,size_t count) +{ + int res; + int d0, d1, d2; + asm volatile( "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "scasb\n\t" + "jne 3f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %%eax,%%eax\n\t" + "jmp 4f\n" + "3:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "4:" + :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + :"1" (cs),"2" (ct),"3" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(strncmp); +#endif + +#ifdef __HAVE_ARCH_STRCHR +char *strchr(const char * s, int c) +{ + int d0; + char * res; + asm volatile( "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "movl $1,%1\n" + "2:\tmovl %1,%0\n\t" + "decl %0" + :"=a" (res), "=&S" (d0) + :"1" (s),"0" (c) + :"memory"); + return res; +} +EXPORT_SYMBOL(strchr); +#endif + +#ifdef __HAVE_ARCH_STRRCHR +char *strrchr(const char * s, int c) +{ + int d0, d1; + char * res; + asm volatile( "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=g" (res), "=&S" (d0), "=&a" (d1) + :"0" (0),"1" (s),"2" (c) + :"memory"); + return res; +} +EXPORT_SYMBOL(strrchr); +#endif + +#ifdef __HAVE_ARCH_STRLEN +size_t strlen(const char * s) +{ + int d0; + int res; + asm volatile( "repne\n\t" + "scasb\n\t" + "notl %0\n\t" + "decl %0" + :"=c" (res), "=&D" (d0) + :"1" (s),"a" (0), "0" (0xffffffffu) + :"memory"); + return res; +} +EXPORT_SYMBOL(strlen); +#endif + +#ifdef __HAVE_ARCH_MEMCHR +void *memchr(const void *cs,int c,size_t count) +{ + int d0; + void *res; + if (!count) + return NULL; + asm volatile( "repne\n\t" + "scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:\tdecl %0" + :"=D" (res), "=&c" (d0) + :"a" (c),"0" (cs),"1" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(memchr); +#endif + +#ifdef __HAVE_ARCH_MEMSCAN +void *memscan(void * addr, int c, size_t size) +{ + if (!size) + return addr; + asm volatile("repnz; scasb\n\t" + "jnz 1f\n\t" + "dec %%edi\n" + "1:" + : "=D" (addr), "=c" (size) + : "0" (addr), "1" (size), "a" (c) + : "memory"); + return addr; +} +EXPORT_SYMBOL(memscan); +#endif + +#ifdef __HAVE_ARCH_STRNLEN +size_t strnlen(const char *s, size_t count) +{ + int d0; + int res; + asm volatile( "movl %2,%0\n\t" + "jmp 2f\n" + "1:\tcmpb $0,(%0)\n\t" + "je 3f\n\t" + "incl %0\n" + "2:\tdecl %1\n\t" + "cmpl $-1,%1\n\t" + "jne 1b\n" + "3:\tsubl %2,%0" + :"=a" (res), "=&d" (d0) + :"c" (s),"1" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(strnlen); +#endif diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c new file mode 100644 index 00000000000..a3dafbf59da --- /dev/null +++ b/arch/x86/lib/strstr_32.c @@ -0,0 +1,31 @@ +#include <linux/string.h> + +char * strstr(const char * cs,const char * ct) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movl %6,%%edi\n\t" + "repne\n\t" + "scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\tmovl %6,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe\n\t" + "cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "xchgl %%eax,%%esi\n\t" + "incl %%esi\n\t" + "cmpb $0,-1(%%eax)\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "2:" + :"=a" (__res), "=&c" (d0), "=&S" (d1) + :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + :"dx", "di"); +return __res; +} + diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c new file mode 100644 index 00000000000..9f38b12b4af --- /dev/null +++ b/arch/x86/lib/usercopy_32.c @@ -0,0 +1,882 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen <ak@muc.de> + * Copyright 1997 Linus Torvalds + */ +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/blkdev.h> +#include <linux/module.h> +#include <linux/backing-dev.h> +#include <linux/interrupt.h> +#include <asm/uaccess.h> +#include <asm/mmx.h> + +static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) + return 0; +#endif + return 1; +} +#define movsl_is_ok(a1,a2,n) \ + __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + might_sleep(); \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +/** + * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * Caller must check the specified block with access_ok() before calling + * this function. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +__strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} +EXPORT_SYMBOL(__strncpy_from_user); + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} +EXPORT_SYMBOL(strncpy_from_user); + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + might_sleep(); \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +/** + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +clear_user(void __user *to, unsigned long n) +{ + might_sleep(); + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} +EXPORT_SYMBOL(clear_user); + +/** + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +__clear_user(void __user *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} +EXPORT_SYMBOL(__clear_user); + +/** + * strnlen_user: - Get the size of a string in user space. + * @s: The string to measure. + * @n: The maximum valid length + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ +long strnlen_user(const char __user *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + might_sleep(); + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} +EXPORT_SYMBOL(strnlen_user); + +#ifdef CONFIG_X86_INTEL_USERCOPY +static unsigned long +__copy_user_intel(void __user *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" + "5: movl %%eax, 0(%3)\n" + "6: movl %%edx, 4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" + "9: movl %%eax, 8(%3)\n" + "10: movl %%edx, 12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" + "13: movl %%eax, 16(%3)\n" + "14: movl %%edx, 20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" + "17: movl %%eax, 24(%3)\n" + "18: movl %%edx, 28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" + "21: movl %%eax, 32(%3)\n" + "22: movl %%edx, 36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" + "25: movl %%eax, 40(%3)\n" + "26: movl %%edx, 44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" + "29: movl %%eax, 48(%3)\n" + "30: movl %%edx, 52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" + "33: movl %%eax, 56(%3)\n" + "34: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; movsl\n" + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movl %%eax, 0(%3)\n" + " movl %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movl %%eax, 8(%3)\n" + " movl %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movl %%eax, 16(%3)\n" + " movl %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movl %%eax, 24(%3)\n" + " movl %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movl %%eax, 32(%3)\n" + " movl %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movl %%eax, 40(%3)\n" + " movl %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movl %%eax, 48(%3)\n" + " movl %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movl %%eax, 56(%3)\n" + " movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +/* + * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. + * hyoshiok@miraclelinux.com + */ + +static unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long __copy_user_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +#else + +/* + * Leave these declared but undefined. They should not be any references to + * them + */ +unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +unsigned long __copy_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); +#endif /* CONFIG_X86_INTEL_USERCOPY */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 2b\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 6f\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "6: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,6b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) +{ +#ifndef CONFIG_X86_WP_WORKS_OK + if (unlikely(boot_cpu_data.wp_works_ok == 0) && + ((unsigned long )to) < TASK_SIZE) { + /* + * When we are in an atomic section (see + * mm/filemap.c:file_read_actor), return the full + * length to take the slow path. + */ + if (in_atomic()) + return n; + + /* + * CPU does not honor the WP bit when writing + * from supervisory mode, and due to preemption or SMP, + * the page tables can change at any time. + * Do it manually. Manfred <manfred@colorfullife.com> + */ + while (n) { + unsigned long offset = ((unsigned long)to)%PAGE_SIZE; + unsigned long len = PAGE_SIZE - offset; + int retval; + struct page *pg; + void *maddr; + + if (len > n) + len = n; + +survive: + down_read(¤t->mm->mmap_sem); + retval = get_user_pages(current, current->mm, + (unsigned long )to, 1, 1, 0, &pg, NULL); + + if (retval == -ENOMEM && is_init(current)) { + up_read(¤t->mm->mmap_sem); + congestion_wait(WRITE, HZ/50); + goto survive; + } + + if (retval != 1) { + up_read(¤t->mm->mmap_sem); + break; + } + + maddr = kmap_atomic(pg, KM_USER0); + memcpy(maddr + offset, from, len); + kunmap_atomic(maddr, KM_USER0); + set_page_dirty_lock(pg); + put_page(pg); + up_read(¤t->mm->mmap_sem); + + from += len; + to += len; + n -= len; + } + return n; + } +#endif + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel(to, from, n); + return n; +} +EXPORT_SYMBOL(__copy_to_user_ll); + +unsigned long __copy_from_user_ll(void *to, const void __user *from, + unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_intel(to, from, n); + return n; +} +EXPORT_SYMBOL(__copy_from_user_ll); + +unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, + unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel((void __user *)to, + (const void *)from, n); + return n; +} +EXPORT_SYMBOL(__copy_from_user_ll_nozero); + +unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, + unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else + __copy_user_zeroing(to, from, n); +#else + __copy_user_zeroing(to, from, n); +#endif + return n; +} + +unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, + unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_intel_nocache(to, from, n); + else + __copy_user(to, from, n); +#else + __copy_user(to, from, n); +#endif + return n; +} + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} +EXPORT_SYMBOL(copy_to_user); + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} +EXPORT_SYMBOL(copy_from_user); |