diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 14:53:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 14:53:12 -0700 |
commit | 797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch) | |
tree | 1383dc469c26ad37fdf960f682d9a48c782935c5 /arch/x86/crypto/glue_helper-asm-avx.S | |
parent | c8d8566952fda026966784a62f324c8352f77430 (diff) | |
parent | 3862de1f6c442d53bd828d39f86d07d933a70605 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
- XTS mode optimisation for twofish/cast6/camellia/aes on x86
- AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia
- SSSE3/AVX/AVX2 optimisations for sha256/sha512
- Added driver for SAHARA2 crypto accelerator
- Fix for GMAC when used in non-IPsec secnarios
- Added generic CMAC implementation (including IPsec glue)
- IP update for crypto/atmel
- Support for more than one device in hwrng/timeriomem
- Added Broadcom BCM2835 RNG driver
- Misc fixes
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits)
crypto: caam - fix job ring cleanup code
crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher
crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher
crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher
crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher
crypto: tcrypt - add async cipher speed tests for blowfish
crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2
crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86
crypto: aesni_intel - add more optimized XTS mode for x86-64
crypto: x86/camellia-aesni-avx - add more optimized XTS code
crypto: cast6-avx: use new optimized XTS code
crypto: x86/twofish-avx - use optimized XTS code
crypto: x86 - add more optimized XTS-mode for serpent-avx
xfrm: add rfc4494 AES-CMAC-96 support
crypto: add CMAC support to CryptoAPI
crypto: testmgr - add empty test vectors for null ciphers
crypto: testmgr - add AES GMAC test vectors
crypto: gcm - fix rfc4543 to handle async crypto correctly
crypto: gcm - make GMAC work when dst and src are different
hwrng: timeriomem - added devicetree hooks
...
Diffstat (limited to 'arch/x86/crypto/glue_helper-asm-avx.S')
-rw-r--r-- | arch/x86/crypto/glue_helper-asm-avx.S | 61 |
1 files changed, 60 insertions, 1 deletions
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index f7b6ea2ddfd..02ee2308fb3 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S @@ -1,7 +1,7 @@ /* * Shared glue code for 128bit block ciphers, AVX assembler macros * - * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -89,3 +89,62 @@ vpxor (6*16)(src), x6, x6; \ vpxor (7*16)(src), x7, x7; \ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); + +#define gf128mul_x_ble(iv, mask, tmp) \ + vpsrad $31, iv, tmp; \ + vpaddq iv, iv, iv; \ + vpshufd $0x13, tmp, tmp; \ + vpand mask, tmp, tmp; \ + vpxor tmp, iv, iv; + +#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ + t1, xts_gf128mul_and_shl1_mask) \ + vmovdqa xts_gf128mul_and_shl1_mask, t0; \ + \ + /* load IV */ \ + vmovdqu (iv), tiv; \ + vpxor (0*16)(src), tiv, x0; \ + vmovdqu tiv, (0*16)(dst); \ + \ + /* construct and store IVs, also xor with source */ \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (1*16)(src), tiv, x1; \ + vmovdqu tiv, (1*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (2*16)(src), tiv, x2; \ + vmovdqu tiv, (2*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (3*16)(src), tiv, x3; \ + vmovdqu tiv, (3*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (4*16)(src), tiv, x4; \ + vmovdqu tiv, (4*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (5*16)(src), tiv, x5; \ + vmovdqu tiv, (5*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (6*16)(src), tiv, x6; \ + vmovdqu tiv, (6*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vpxor (7*16)(src), tiv, x7; \ + vmovdqu tiv, (7*16)(dst); \ + \ + gf128mul_x_ble(tiv, t0, t1); \ + vmovdqu tiv, (iv); + +#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ + vpxor (0*16)(dst), x0, x0; \ + vpxor (1*16)(dst), x1, x1; \ + vpxor (2*16)(dst), x2, x2; \ + vpxor (3*16)(dst), x3, x3; \ + vpxor (4*16)(dst), x4, x4; \ + vpxor (5*16)(dst), x5, x5; \ + vpxor (6*16)(dst), x6, x6; \ + vpxor (7*16)(dst), x7, x7; \ + store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); |