diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sh64/lib/memcpy.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/sh64/lib/memcpy.c')
-rw-r--r-- | arch/sh64/lib/memcpy.c | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/arch/sh64/lib/memcpy.c b/arch/sh64/lib/memcpy.c new file mode 100644 index 00000000000..c785d0aa194 --- /dev/null +++ b/arch/sh64/lib/memcpy.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com) + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <asm/string.h> + +// This is a simplistic optimization of memcpy to increase the +// granularity of access beyond one byte using aligned +// loads and stores. This is not an optimal implementation +// for SH-5 (especially with regard to prefetching and the cache), +// and a better version should be provided later ... + +void *memcpy(void *dest, const void *src, size_t count) +{ + char *d = (char *) dest, *s = (char *) src; + + if (count >= 32) { + int i = 8 - (((unsigned long) d) & 0x7); + + if (i != 8) + while (i-- && count--) { + *d++ = *s++; + } + + if (((((unsigned long) d) & 0x7) == 0) && + ((((unsigned long) s) & 0x7) == 0)) { + while (count >= 32) { + unsigned long long t1, t2, t3, t4; + t1 = *(unsigned long long *) (s); + t2 = *(unsigned long long *) (s + 8); + t3 = *(unsigned long long *) (s + 16); + t4 = *(unsigned long long *) (s + 24); + *(unsigned long long *) (d) = t1; + *(unsigned long long *) (d + 8) = t2; + *(unsigned long long *) (d + 16) = t3; + *(unsigned long long *) (d + 24) = t4; + d += 32; + s += 32; + count -= 32; + } + while (count >= 8) { + *(unsigned long long *) d = + *(unsigned long long *) s; + d += 8; + s += 8; + count -= 8; + } + } + + if (((((unsigned long) d) & 0x3) == 0) && + ((((unsigned long) s) & 0x3) == 0)) { + while (count >= 4) { + *(unsigned long *) d = *(unsigned long *) s; + d += 4; + s += 4; + count -= 4; + } + } + + if (((((unsigned long) d) & 0x1) == 0) && + ((((unsigned long) s) & 0x1) == 0)) { + while (count >= 2) { + *(unsigned short *) d = *(unsigned short *) s; + d += 2; + s += 2; + count -= 2; + } + } + } + + while (count--) { + *d++ = *s++; + } + + return d; +} |