summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 08:38:25 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 08:38:25 -0800
commiteba0e319c12fb098d66316a8eafbaaa9174a07c3 (patch)
treeb2703117db9e36bb3510654efd55361f61c54742 /arch/x86
parentdf8dc74e8a383eaf2d9b44b80a71ec6f0e52b42e (diff)
parent15e7b4452b72ae890f2fcb027b4c4fa63a1c9a7a (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (125 commits) [CRYPTO] twofish: Merge common glue code [CRYPTO] hifn_795x: Fixup container_of() usage [CRYPTO] cast6: inline bloat-- [CRYPTO] api: Set default CRYPTO_MINALIGN to unsigned long long [CRYPTO] tcrypt: Make xcbc available as a standalone test [CRYPTO] xcbc: Remove bogus hash/cipher test [CRYPTO] xcbc: Fix algorithm leak when block size check fails [CRYPTO] tcrypt: Zero axbuf in the right function [CRYPTO] padlock: Only reset the key once for each CBC and ECB operation [CRYPTO] api: Include sched.h for cond_resched in scatterwalk.h [CRYPTO] salsa20-asm: Remove unnecessary dependency on CRYPTO_SALSA20 [CRYPTO] tcrypt: Add select of AEAD [CRYPTO] salsa20: Add x86-64 assembly version [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version) [CRYPTO] gcm: Introduce rfc4106 [CRYPTO] api: Show async type [CRYPTO] chainiv: Avoid lock spinning where possible [CRYPTO] seqiv: Add select AEAD in Kconfig [CRYPTO] scatterwalk: Handle zero nbytes in scatterwalk_map_and_copy [CRYPTO] null: Allow setkey on digest_null ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile12
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S89
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S68
-rw-r--r--arch/x86/crypto/aes_32.c515
-rw-r--r--arch/x86/crypto/aes_64.c336
-rw-r--r--arch/x86/crypto/aes_glue.c57
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S1114
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S920
-rw-r--r--arch/x86/crypto/salsa20_glue.c129
-rw-r--r--arch/x86/crypto/twofish_64.c97
-rw-r--r--arch/x86/crypto/twofish_glue.c (renamed from arch/x86/crypto/twofish_32.c)8
11 files changed, 2309 insertions, 1036 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 46bb609e244..3874c2de540 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,12 +4,16 @@
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
+obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
+obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
-aes-i586-y := aes-i586-asm_32.o aes_32.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
+aes-i586-y := aes-i586-asm_32.o aes_glue.o
+twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
+salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
-aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o
-twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o
+aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
+salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index f942f0c8f63..1093bede3e0 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -46,9 +46,9 @@
#define in_blk 16
/* offsets in crypto_tfm structure */
-#define ekey (crypto_tfm_ctx_offset + 0)
-#define nrnd (crypto_tfm_ctx_offset + 256)
-#define dkey (crypto_tfm_ctx_offset + 260)
+#define klen (crypto_tfm_ctx_offset + 0)
+#define ekey (crypto_tfm_ctx_offset + 4)
+#define dkey (crypto_tfm_ctx_offset + 244)
// register mapping for encrypt and decrypt subroutines
@@ -221,8 +221,8 @@
.global aes_enc_blk
-.extern ft_tab
-.extern fl_tab
+.extern crypto_ft_tab
+.extern crypto_fl_tab
.align 4
@@ -236,7 +236,7 @@ aes_enc_blk:
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
- mov nrnd(%ebp),%r3 // number of rounds
+ mov klen(%ebp),%r3 // key size
push %edi
#if ekey != 0
lea ekey(%ebp),%ebp // key pointer
@@ -255,26 +255,26 @@ aes_enc_blk:
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
- cmp $12,%r3
+ cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
-2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key
- fwd_rnd2( -48(%ebp) ,ft_tab)
-3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key
- fwd_rnd2( -16(%ebp) ,ft_tab)
-4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
- fwd_rnd2( +16(%ebp) ,ft_tab)
- fwd_rnd1( +32(%ebp) ,ft_tab)
- fwd_rnd2( +48(%ebp) ,ft_tab)
- fwd_rnd1( +64(%ebp) ,ft_tab)
- fwd_rnd2( +80(%ebp) ,ft_tab)
- fwd_rnd1( +96(%ebp) ,ft_tab)
- fwd_rnd2(+112(%ebp) ,ft_tab)
- fwd_rnd1(+128(%ebp) ,ft_tab)
- fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
+2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
+ fwd_rnd2( -48(%ebp), crypto_ft_tab)
+3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
+ fwd_rnd2( -16(%ebp), crypto_ft_tab)
+4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
+ fwd_rnd2( +16(%ebp), crypto_ft_tab)
+ fwd_rnd1( +32(%ebp), crypto_ft_tab)
+ fwd_rnd2( +48(%ebp), crypto_ft_tab)
+ fwd_rnd1( +64(%ebp), crypto_ft_tab)
+ fwd_rnd2( +80(%ebp), crypto_ft_tab)
+ fwd_rnd1( +96(%ebp), crypto_ft_tab)
+ fwd_rnd2(+112(%ebp), crypto_ft_tab)
+ fwd_rnd1(+128(%ebp), crypto_ft_tab)
+ fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
@@ -297,8 +297,8 @@ aes_enc_blk:
.global aes_dec_blk
-.extern it_tab
-.extern il_tab
+.extern crypto_it_tab
+.extern crypto_il_tab
.align 4
@@ -312,14 +312,11 @@ aes_dec_blk:
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
- mov nrnd(%ebp),%r3 // number of rounds
+ mov klen(%ebp),%r3 // key size
push %edi
#if dkey != 0
lea dkey(%ebp),%ebp // key pointer
#endif
- mov %r3,%r0
- shl $4,%r0
- add %r0,%ebp
// input four columns and xor in first round key
@@ -333,27 +330,27 @@ aes_dec_blk:
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
- sub $16,%ebp // increment to next round key
- cmp $12,%r3
+ add $16,%ebp // increment to next round key
+ cmp $24,%r3
jb 4f // 10 rounds for 128-bit key
- lea -32(%ebp),%ebp
+ lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
- lea -32(%ebp),%ebp
-
-2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key
- inv_rnd2( +48(%ebp), it_tab)
-3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key
- inv_rnd2( +16(%ebp), it_tab)
-4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
- inv_rnd2( -16(%ebp), it_tab)
- inv_rnd1( -32(%ebp), it_tab)
- inv_rnd2( -48(%ebp), it_tab)
- inv_rnd1( -64(%ebp), it_tab)
- inv_rnd2( -80(%ebp), it_tab)
- inv_rnd1( -96(%ebp), it_tab)
- inv_rnd2(-112(%ebp), it_tab)
- inv_rnd1(-128(%ebp), it_tab)
- inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
+ lea 32(%ebp),%ebp
+
+2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
+ inv_rnd2( -48(%ebp), crypto_it_tab)
+3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
+ inv_rnd2( -16(%ebp), crypto_it_tab)
+4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
+ inv_rnd2( +16(%ebp), crypto_it_tab)
+ inv_rnd1( +32(%ebp), crypto_it_tab)
+ inv_rnd2( +48(%ebp), crypto_it_tab)
+ inv_rnd1( +64(%ebp), crypto_it_tab)
+ inv_rnd2( +80(%ebp), crypto_it_tab)
+ inv_rnd1( +96(%ebp), crypto_it_tab)
+ inv_rnd2(+112(%ebp), crypto_it_tab)
+ inv_rnd1(+128(%ebp), crypto_it_tab)
+ inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 26b40de4d0b..a120f526c3d 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -8,10 +8,10 @@
* including this sentence is retained in full.
*/
-.extern aes_ft_tab
-.extern aes_it_tab
-.extern aes_fl_tab
-.extern aes_il_tab
+.extern crypto_ft_tab
+.extern crypto_it_tab
+.extern crypto_fl_tab
+.extern crypto_il_tab
.text
@@ -56,13 +56,13 @@
.align 8; \
FUNC: movq r1,r2; \
movq r3,r4; \
- leaq BASE+KEY+52(r8),r9; \
+ leaq BASE+KEY+48+4(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
movl 4(r7),r1 ## E; \
movl 8(r7),r6 ## E; \
movl 12(r7),r7 ## E; \
- movl BASE(r8),r10 ## E; \
+ movl BASE+0(r8),r10 ## E; \
xorl -48(r9),r5 ## E; \
xorl -44(r9),r1 ## E; \
xorl -40(r9),r6 ## E; \
@@ -154,37 +154,37 @@ FUNC: movq r1,r2; \
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_enc_blk,0,enc128,enc192)
- encrypt_round(aes_ft_tab,-96)
- encrypt_round(aes_ft_tab,-80)
-enc192: encrypt_round(aes_ft_tab,-64)
- encrypt_round(aes_ft_tab,-48)
-enc128: encrypt_round(aes_ft_tab,-32)
- encrypt_round(aes_ft_tab,-16)
- encrypt_round(aes_ft_tab, 0)
- encrypt_round(aes_ft_tab, 16)
- encrypt_round(aes_ft_tab, 32)
- encrypt_round(aes_ft_tab, 48)
- encrypt_round(aes_ft_tab, 64)
- encrypt_round(aes_ft_tab, 80)
- encrypt_round(aes_ft_tab, 96)
- encrypt_final(aes_fl_tab,112)
+ encrypt_round(crypto_ft_tab,-96)
+ encrypt_round(crypto_ft_tab,-80)
+enc192: encrypt_round(crypto_ft_tab,-64)
+ encrypt_round(crypto_ft_tab,-48)
+enc128: encrypt_round(crypto_ft_tab,-32)
+ encrypt_round(crypto_ft_tab,-16)
+ encrypt_round(crypto_ft_tab, 0)
+ encrypt_round(crypto_ft_tab, 16)
+ encrypt_round(crypto_ft_tab, 32)
+ encrypt_round(crypto_ft_tab, 48)
+ encrypt_round(crypto_ft_tab, 64)
+ encrypt_round(crypto_ft_tab, 80)
+ encrypt_round(crypto_ft_tab, 96)
+ encrypt_final(crypto_fl_tab,112)
return
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
entry(aes_dec_blk,240,dec128,dec192)
- decrypt_round(aes_it_tab,-96)
- decrypt_round(aes_it_tab,-80)
-dec192: decrypt_round(aes_it_tab,-64)
- decrypt_round(aes_it_tab,-48)
-dec128: decrypt_round(aes_it_tab,-32)
- decrypt_round(aes_it_tab,-16)
- decrypt_round(aes_it_tab, 0)
- decrypt_round(aes_it_tab, 16)
- decrypt_round(aes_it_tab, 32)
- decrypt_round(aes_it_tab, 48)
- decrypt_round(aes_it_tab, 64)
- decrypt_round(aes_it_tab, 80)
- decrypt_round(aes_it_tab, 96)
- decrypt_final(aes_il_tab,112)
+ decrypt_round(crypto_it_tab,-96)
+ decrypt_round(crypto_it_tab,-80)
+dec192: decrypt_round(crypto_it_tab,-64)
+ decrypt_round(crypto_it_tab,-48)
+dec128: decrypt_round(crypto_it_tab,-32)
+ decrypt_round(crypto_it_tab,-16)
+ decrypt_round(crypto_it_tab, 0)
+ decrypt_round(crypto_it_tab, 16)
+ decrypt_round(crypto_it_tab, 32)
+ decrypt_round(crypto_it_tab, 48)
+ decrypt_round(crypto_it_tab, 64)
+ decrypt_round(crypto_it_tab, 80)
+ decrypt_round(crypto_it_tab, 96)
+ decrypt_final(crypto_il_tab,112)
return
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c
deleted file mode 100644
index 49aad9397f1..00000000000
--- a/arch/x86/crypto/aes_32.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- *
- * Glue Code for optimized 586 assembler version of AES
- *
- * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- * 1. distributions of this source code include the above copyright
- * notice, this list of conditions and the following disclaimer;
- *
- * 2. distributions in binary form include the above copyright
- * notice, this list of conditions and the following disclaimer
- * in the documentation and/or other associated materials;
- *
- * 3. the copyright holder's name is not used to endorse products
- * built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- *
- * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
- * 2.5 API).
- * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- */
-
-#include <asm/byteorder.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/linkage.h>
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-#define AES_MIN_KEY_SIZE 16
-#define AES_MAX_KEY_SIZE 32
-#define AES_BLOCK_SIZE 16
-#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
-#define RC_LENGTH 29
-
-struct aes_ctx {
- u32 ekey[AES_KS_LENGTH];
- u32 rounds;
- u32 dkey[AES_KS_LENGTH];
-};
-
-#define WPOLY 0x011b
-#define bytes2word(b0, b1, b2, b3) \
- (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
-
-/* define the finite field multiplies required for Rijndael */
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ? pow[255 - log[x]]: 0)
-
-static inline u32 upr(u32 x, int n)
-{
- return (x << 8 * n) | (x >> (32 - 8 * n));
-}
-
-static inline u8 bval(u32 x, int n)
-{
- return x >> 8 * n;
-}
-
-/* The forward and inverse affine transformations used in the S-box */
-#define fwd_affine(x) \
- (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
-
-#define inv_affine(x) \
- (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
-
-static u32 rcon_tab[RC_LENGTH];
-
-u32 ft_tab[4][256];
-u32 fl_tab[4][256];
-static u32 im_tab[4][256];
-u32 il_tab[4][256];
-u32 it_tab[4][256];
-
-static void gen_tabs(void)
-{
- u32 i, w;
- u8 pow[512], log[256];
-
- /*
- * log and power tables for GF(2^8) finite field with
- * WPOLY as modular polynomial - the simplest primitive
- * root is 0x03, used here to generate the tables.
- */
- i = 0; w = 1;
-
- do {
- pow[i] = (u8)w;
- pow[i + 255] = (u8)w;
- log[w] = (u8)i++;
- w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
- } while (w != 1);
-
- for(i = 0, w = 1; i < RC_LENGTH; ++i) {
- rcon_tab[i] = bytes2word(w, 0, 0, 0);
- w = f2(w);
- }
-
- for(i = 0; i < 256; ++i) {
- u8 b;
-
- b = fwd_affine(fi((u8)i));
- w = bytes2word(f2(b), b, b, f3(b));
-
- /* tables for a normal encryption round */
- ft_tab[0][i] = w;
- ft_tab[1][i] = upr(w, 1);
- ft_tab[2][i] = upr(w, 2);
- ft_tab[3][i] = upr(w, 3);
- w = bytes2word(b, 0, 0, 0);
-
- /*
- * tables for last encryption round
- * (may also be used in the key schedule)
- */
- fl_tab[0][i] = w;
- fl_tab[1][i] = upr(w, 1);
- fl_tab[2][i] = upr(w, 2);
- fl_tab[3][i] = upr(w, 3);
-
- b = fi(inv_affine((u8)i));
- w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
- /* tables for the inverse mix column operation */
- im_tab[0][b] = w;
- im_tab[1][b] = upr(w, 1);
- im_tab[2][b] = upr(w, 2);
- im_tab[3][b] = upr(w, 3);
-
- /* tables for a normal decryption round */
- it_tab[0][i] = w;
- it_tab[1][i] = upr(w,1);
- it_tab[2][i] = upr(w,2);
- it_tab[3][i] = upr(w,3);
-
- w = bytes2word(b, 0, 0, 0);
-
- /* tables for last decryption round */
- il_tab[0][i] = w;
- il_tab[1][i] = upr(w,1);
- il_tab[2][i] = upr(w,2);
- il_tab[3][i] = upr(w,3);
- }
-}
-
-#define four_tables(x,tab,vf,rf,c) \
-( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
- tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
- tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
- tab[3][bval(vf(x,3,c),rf(3,c))] \
-)
-
-#define vf1(x,r,c) (x)
-#define rf1(r,c) (r)
-#define rf2(r,c) ((r-c)&3)
-
-#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
-#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
-
-#define ff(x) inv_mcol(x)
-
-#define ke4(k,i) \
-{ \
- k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
- k[4*(i)+5] = ss[1] ^= ss[0]; \
- k[4*(i)+6] = ss[2] ^= ss[1]; \
- k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-
-#define kel4(k,i) \
-{ \
- k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
- k[4*(i)+5] = ss[1] ^= ss[0]; \
- k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-
-#define ke6(k,i) \
-{ \
- k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
- k[6*(i)+ 7] = ss[1] ^= ss[0]; \
- k[6*(i)+ 8] = ss[2] ^= ss[1]; \
- k[6*(i)+ 9] = ss[3] ^= ss[2]; \
- k[6*(i)+10] = ss[4] ^= ss[3]; \
- k[6*(i)+11] = ss[5] ^= ss[4]; \
-}
-
-#define kel6(k,i) \
-{ \
- k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
- k[6*(i)+ 7] = ss[1] ^= ss[0]; \
- k[6*(i)+ 8] = ss[2] ^= ss[1]; \
- k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-}
-
-#define ke8(k,i) \
-{ \
- k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
- k[8*(i)+ 9] = ss[1] ^= ss[0]; \
- k[8*(i)+10] = ss[2] ^= ss[1]; \
- k[8*(i)+11] = ss[3] ^= ss[2]; \
- k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
- k[8*(i)+13] = ss[5] ^= ss[4]; \
- k[8*(i)+14] = ss[6] ^= ss[5]; \
- k[8*(i)+15] = ss[7] ^= ss[6]; \
-}
-
-#define kel8(k,i) \
-{ \
- k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
- k[8*(i)+ 9] = ss[1] ^= ss[0]; \
- k[8*(i)+10] = ss[2] ^= ss[1]; \
- k[8*(i)+11] = ss[3] ^= ss[2]; \
-}
-
-#define kdf4(k,i) \
-{ \
- ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
- ss[1] = ss[1] ^ ss[3]; \
- ss[2] = ss[2] ^ ss[3]; \
- ss[3] = ss[3]; \
- ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
- ss[i % 4] ^= ss[4]; \
- ss[4] ^= k[4*(i)]; \
- k[4*(i)+4] = ff(ss[4]); \
- ss[4] ^= k[4*(i)+1]; \
- k[4*(i)+5] = ff(ss[4]); \
- ss[4] ^= k[4*(i)+2]; \
- k[4*(i)+6] = ff(ss[4]); \
- ss[4] ^= k[4*(i)+3]; \
- k[4*(i)+7] = ff(ss[4]); \
-}
-
-#define kd4(k,i) \
-{ \
- ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
- ss[i % 4] ^= ss[4]; \
- ss[4] = ff(ss[4]); \
- k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
- k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
- k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
- k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
-}
-
-#define kdl4(k,i) \
-{ \
- ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
- ss[i % 4] ^= ss[4]; \
- k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
- k[4*(i)+5] = ss[1] ^ ss[3]; \
- k[4*(i)+6] = ss[0]; \
- k[4*(i)+7] = ss[1]; \
-}
-
-#define kdf6(k,i) \
-{ \
- ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
- k[6*(i)+ 6] = ff(ss[0]); \
- ss[1] ^= ss[0]; \
- k[6*(i)+ 7] = ff(ss[1]); \
- ss[2] ^= ss[1]; \
- k[6*(i)+ 8] = ff(ss[2]); \
- ss[3] ^= ss[2]; \
- k[6*(i)+ 9] = ff(ss[3]); \
- ss[4] ^= ss[3]; \
- k[6*(i)+10] = ff(ss[4]); \
- ss[5] ^= ss[4]; \
- k[6*(i)+11] = ff(ss[5]); \
-}
-
-#define kd6(k,i) \
-{ \
- ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
- ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
- k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
- ss[1] ^= ss[0]; \
- k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
- ss[2] ^= ss[1]; \
- k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
- ss[3] ^= ss[2]; \
- k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
- ss[4] ^= ss[3]; \
- k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
- ss[5] ^= ss[4]; \
- k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
-}
-
-#define kdl6(k,i) \
-{ \
- ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
- k[6*(i)+ 6] = ss[0]; \
- ss[1] ^= ss[0]; \
- k[6*(i)+ 7] = ss[1]; \
- ss[2] ^= ss[1]; \
- k[6*(i)+ 8] = ss[2]; \
- ss[3] ^= ss[2]; \
- k[6*(i)+ 9] = ss[3]; \
-}
-
-#define kdf8(k,i) \
-{ \
- ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
- k[8*(i)+ 8] = ff(ss[0]); \
- ss[1] ^= ss[0]; \
- k[8*(i)+ 9] = ff(ss[1]); \
- ss[2] ^= ss[1]; \
- k[8*(i)+10] = ff(ss[2]); \
- ss[3] ^= ss[2]; \
- k[8*(i)+11] = ff(ss[3]); \
- ss[4] ^= ls_box(ss[3],0); \
- k[8*(i)+12] = ff(ss[4]); \
- ss[5] ^= ss[4]; \
- k[8*(i)+13] = ff(ss[5]); \
- ss[6] ^= ss[5]; \
- k[8*(i)+14] = ff(ss[6]); \
- ss[7] ^= ss[6]; \
- k[8*(i)+15] = ff(ss[7]); \
-}
-
-#define kd8(k,i) \
-{ \
- u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
- ss[0] ^= __g; \
- __g = ff(__g); \
- k[8*(i)+ 8] = __g ^= k[8*(i)]; \
- ss[1] ^= ss[0]; \
- k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
- ss[2] ^= ss[1]; \
- k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
- ss[3] ^= ss[2]; \
- k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
- __g = ls_box(ss[3],0); \
- ss[4] ^= __g; \
- __g = ff(__g); \
- k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
- ss[5] ^= ss[4]; \
- k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
- ss[6] ^= ss[5]; \
- k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
- ss[7] ^= ss[6]; \
- k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
-}
-
-#define kdl8(k,i) \
-{ \
- ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
- k[8*(i)+ 8] = ss[0]; \
- ss[1] ^= ss[0]; \
- k[8*(i)+ 9] = ss[1]; \
- ss[2] ^= ss[1]; \
- k[8*(i)+10] = ss[2]; \
- ss[3] ^= ss[2]; \
- k[8*(i)+11] = ss[3]; \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- int i;
- u32 ss[8];
- struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
- const __le32 *key = (const __le32 *)in_key;
- u32 *flags = &tfm->crt_flags;
-
- /* encryption schedule */
-
- ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
- ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
- ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
- ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
-
- switch(key_len) {
- case 16:
- for (i = 0; i < 9; i++)
- ke4(ctx->ekey, i);
- kel4(ctx->ekey, 9);
- ctx->rounds = 10;
- break;
-
- case 24:
- ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
- ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
- for (i = 0; i < 7; i++)
- ke6(ctx->ekey, i);
- kel6(ctx->ekey, 7);
- ctx->rounds = 12;
- break;
-
- case 32:
- ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
- ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
- ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
- ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
- for (i = 0; i < 6; i++)
- ke8(ctx->ekey, i);
- kel8(ctx->ekey, 6);
- ctx->rounds = 14;
- break;
-
- default:
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
- /* decryption schedule */
-
- ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
- ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
- ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
- ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
-
- switch (key_len) {
- case 16:
- kdf4(ctx->dkey, 0);
- for (i = 1; i < 9; i++)
- kd4(ctx->dkey, i);
- kdl4(ctx->dkey, 9);
- break;
-
- case 24:
- ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
- ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
- kdf6(ctx->dkey, 0);
- for (i = 1; i < 7; i++)
- kd6(ctx->dkey, i);
- kdl6(ctx->dkey, 7);
- break;
-
- case 32:
- ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
- ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
- ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
- ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
- kdf8(ctx->dkey, 0);
- for (i = 1; i < 6; i++)
- kd8(ctx->dkey, i);
- kdl8(ctx->dkey, 6);
- break;
- }
- return 0;
-}
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-i586",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct aes_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = aes_encrypt,
- .cia_decrypt = aes_decrypt
- }
- }
-};
-
-static int __init aes_init(void)
-{
- gen_tabs();
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
-MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c
deleted file mode 100644
index 5cdb13ea5cc..00000000000
--- a/arch/x86/crypto/aes_64.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Cryptographic API.
- *
- * AES Cipher Algorithm.
- *
- * Based on Brian Gladman's code.
- *
- * Linux developers:
- * Alexander Kjeldaas <astor@fast.no>
- * Herbert Valerio Riedel <hvr@hvrlab.org>
- * Kyle McMartin <kyle@debian.org>
- * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
- * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * ---------------------------------------------------------------------------
- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- * 1. distributions of this source code include the above copyright
- * notice, this list of conditions and the following disclaimer;
- *
- * 2. distributions in binary form include the above copyright
- * notice, this list of conditions and the following disclaimer
- * in the documentation and/or other associated materials;
- *
- * 3. the copyright holder's name is not used to endorse products
- * built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- * ---------------------------------------------------------------------------
- */
-
-/* Some changes from the Gladman version:
- s/RIJNDAEL(e_key)/E_KEY/g
- s/RIJNDAEL(d_key)/D_KEY/g
-*/
-
-#include <asm/byteorder.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-#define AES_MIN_KEY_SIZE 16
-#define AES_MAX_KEY_SIZE 32
-
-#define AES_BLOCK_SIZE 16
-
-/*
- * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
- */
-static inline u8 byte(const u32 x, const unsigned n)
-{
- return x >> (n << 3);
-}
-
-struct aes_ctx
-{
- u32 key_length;
- u32 buf[120];
-};
-
-#define E_KEY (&ctx->buf[0])
-#define D_KEY (&ctx->buf[60])
-
-static u8 pow_tab[256] __initdata;
-static u8 log_tab[256] __initdata;
-static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
-static u32 rco_tab[10];
-u32 aes_ft_tab[4][256];
-u32 aes_it_tab[4][256];
-
-u32 aes_fl_tab[4][256];
-u32 aes_il_tab[4][256];
-
-static inline u8 f_mult(u8 a, u8 b)
-{
- u8 aa = log_tab[a], cc = aa + log_tab[b];
-
- return pow_tab[cc + (cc < aa ? 1 : 0)];
-}
-
-#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
-
-#define ls_box(x) \
- (aes_fl_tab[0][byte(x, 0)] ^ \
- aes_fl_tab[1][byte(x, 1)] ^ \
- aes_fl_tab[2][byte(x, 2)] ^ \
- aes_fl_tab[3][byte(x, 3)])
-
-static void __init gen_tabs(void)
-{
- u32 i, t;
- u8 p, q;
-
- /* log and power tables for GF(2**8) finite field with
- 0x011b as modular polynomial - the simplest primitive
- root is 0x03, used here to generate the tables */
-
- for (i = 0, p = 1; i < 256; ++i) {
- pow_tab[i] = (u8)p;
- log_tab[p] = (u8)i;
-
- p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
- }
-
- log_tab[1] = 0;
-
- for (i = 0, p = 1; i < 10; ++i) {
- rco_tab[i] = p;
-
- p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
- }
-
- for (i = 0; i < 256; ++i) {
- p = (i ? pow_tab[255 - log_tab[i]] : 0);
- q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
- p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
- sbx_tab[i] = p;
- isb_tab[p] = (u8)i;
- }
-
- for (i = 0; i < 256; ++i) {
- p = sbx_tab[i];
-
- t = p;
- aes_fl_tab[0][i] = t;
- aes_fl_tab[1][i] = rol32(t, 8);
- aes_fl_tab[2][i] = rol32(t, 16);
- aes_fl_tab[3][i] = rol32(t, 24);
-
- t = ((u32)ff_mult(2, p)) |
- ((u32)p << 8) |
- ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
-
- aes_ft_tab[0][i] = t;
- aes_ft_tab[1][i] = rol32(t, 8);
- aes_ft_tab[2][i] = rol32(t, 16);
- aes_ft_tab[3][i] = rol32(t, 24);
-
- p = isb_tab[i];
-
- t = p;
- aes_il_tab[0][i] = t;
- aes_il_tab[1][i] = rol32(t, 8);
- aes_il_tab[2][i] = rol32(t, 16);
- aes_il_tab[3][i] = rol32(t, 24);
-
- t = ((u32)ff_mult(14, p)) |
- ((u32)ff_mult(9, p) << 8) |
- ((u32)ff_mult(13, p) << 16) |
- ((u32)ff_mult(11, p) << 24);
-
- aes_it_tab[0][i] = t;
- aes_it_tab[1][i] = rol32(t, 8);
- aes_it_tab[2][i] = rol32(t, 16);
- aes_it_tab[3][i] = rol32(t, 24);
- }
-}
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x) \
- u = star_x(x); \
- v = star_x(u); \
- w = star_x(v); \
- t = w ^ (x); \
- (y) = u ^ v ^ w; \
- (y) ^= ror32(u ^ t, 8) ^ \
- ror32(v ^ t, 16) ^ \
- ror32(t, 24)
-
-/* initialise the key schedule from the user supplied key */
-
-#define loop4(i) \
-{ \
- t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
- t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
- t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
- t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
-}
-
-#define loop6(i) \
-{ \
- t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
- t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
- t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
- t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
- t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
- t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
-}
-
-#define loop8(i) \
-{ \
- t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
- t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
- t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
- t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
- t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
- t = E_KEY[8 * i + 4] ^ ls_box(t); \
- E_KEY[8 * i + 12] = t; \
- t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
- t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
- t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
- const __le32 *key = (const __le32 *)in_key;
- u32 *flags = &tfm->crt_flags;
- u32 i, j, t, u, v, w;
-
- if (key_len % 8) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
- ctx->key_length = key_len;
-
- D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]);
- D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]);
- D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]);
- D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]);
-
- switch (key_len) {
- case 16:
- t = E_KEY[3];
- for (i = 0; i < 10; ++i)
- loop4(i);
- break;
-
- case 24:
- E_KEY[4] = le32_to_cpu(key[4]);
- t = E_KEY[5] = le32_to_cpu(key[5]);
- for (i = 0; i < 8; ++i)
- loop6 (i);
- break;
-
- case 32:
- E_KEY[4] = le32_to_cpu(key[4]);
- E_KEY[5] = le32_to_cpu(key[5]);
- E_KEY[6] = le32_to_cpu(key[6]);
- t = E_KEY[7] = le32_to_cpu(key[7]);
- for (i = 0; i < 7; ++i)
- loop8(i);
- break;
- }
-
- D_KEY[0] = E_KEY[key_len + 24];
- D_KEY[1] = E_KEY[key_len + 25];
- D_KEY[2] = E_KEY[key_len + 26];
- D_KEY[3] = E_KEY[key_len + 27];
-
- for (i = 4; i < key_len + 24; ++i) {
- j = key_len + 24 - (i & ~3) + (i & 3);
- imix_col(D_KEY[j], E_KEY[i]);
- }
-
- return 0;
-}
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-x86_64",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct aes_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = aes_encrypt,
- .cia_decrypt = aes_decrypt
- }
- }
-};
-
-static int __init aes_init(void)
-{
- gen_tabs();
- return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
- crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
new file mode 100644
index 00000000000..71f45782711
--- /dev/null
+++ b/arch/x86/crypto/aes_glue.c
@@ -0,0 +1,57 @@
+/*
+ * Glue Code for the asm optimized version of the AES Cipher Algorithm
+ *
+ */
+
+#include <crypto/aes.h>
+
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ aes_enc_blk(tfm, dst, src);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ aes_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = crypto_aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt
+ }
+ }
+};
+
+static int __init aes_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
+MODULE_ALIAS("aes-asm");
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
new file mode 100644
index 00000000000..72eb306680b
--- /dev/null
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -0,0 +1,1114 @@
+# salsa20_pm.s version 20051229
+# D. J. Bernstein
+# Public domain.
+
+# enter ECRYPT_encrypt_bytes
+.text
+.p2align 5
+.globl ECRYPT_encrypt_bytes
+ECRYPT_encrypt_bytes:
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,80(%esp)
+ # ebx_stack = ebx
+ movl %ebx,84(%esp)
+ # esi_stack = esi
+ movl %esi,88(%esp)
+ # edi_stack = edi
+ movl %edi,92(%esp)
+ # ebp_stack = ebp
+ movl %ebp,96(%esp)
+ # x = arg1
+ movl 4(%esp,%eax),%edx
+ # m = arg2
+ movl 8(%esp,%eax),%esi
+ # out = arg3
+ movl 12(%esp,%eax),%edi
+ # bytes = arg4
+ movl 16(%esp,%eax),%ebx
+ # bytes -= 0
+ sub $0,%ebx
+ # goto done if unsigned<=
+ jbe ._done
+._start:
+ # in0 = *(uint32 *) (x + 0)
+ movl 0(%edx),%eax
+ # in1 = *(uint32 *) (x + 4)
+ movl 4(%edx),%ecx
+ # in2 = *(uint32 *) (x + 8)
+ movl 8(%edx),%ebp
+ # j0 = in0
+ movl %eax,164(%esp)
+ # in3 = *(uint32 *) (x + 12)
+ movl 12(%edx),%eax
+ # j1 = in1
+ movl %ecx,168(%esp)
+ # in4 = *(uint32 *) (x + 16)
+ movl 16(%edx),%ecx
+ # j2 = in2
+ movl %ebp,172(%esp)
+ # in5 = *(uint32 *) (x + 20)
+ movl 20(%edx),%ebp
+ # j3 = in3
+ movl %eax,176(%esp)
+ # in6 = *(uint32 *) (x + 24)
+ movl 24(%edx),%eax
+ # j4 = in4
+ movl %ecx,180(%esp)
+ # in7 = *(uint32 *) (x + 28)
+ movl 28(%edx),%ecx
+ # j5 = in5
+ movl %ebp,184(%esp)
+ # in8 = *(uint32 *) (x + 32)
+ movl 32(%edx),%ebp
+ # j6 = in6
+ movl %eax,188(%esp)
+ # in9 = *(uint32 *) (x + 36)
+ movl 36(%edx),%eax
+ # j7 = in7
+ movl %ecx,192(%esp)
+ # in10 = *(uint32 *) (x + 40)
+ movl 40(%edx),%ecx
+ # j8 = in8
+ movl %ebp,196(%esp)
+ # in11 = *(uint32 *) (x + 44)
+ movl 44(%edx),%ebp
+ # j9 = in9
+ movl %eax,200(%esp)
+ # in12 = *(uint32 *) (x + 48)
+ movl 48(%edx),%eax
+ # j10 = in10
+ movl %ecx,204(%esp)
+ # in13 = *(uint32 *) (x + 52)
+ movl 52(%edx),%ecx
+ # j11 = in11
+ movl %ebp,208(%esp)
+ # in14 = *(uint32 *) (x + 56)
+ movl 56(%edx),%ebp
+ # j12 = in12
+ movl %eax,212(%esp)
+ # in15 = *(uint32 *) (x + 60)
+ movl 60(%edx),%eax
+ # j13 = in13
+ movl %ecx,216(%esp)
+ # j14 = in14
+ movl %ebp,220(%esp)
+ # j15 = in15
+ movl %eax,224(%esp)
+ # x_backup = x
+ movl %edx,64(%esp)
+._bytesatleast1:
+ # bytes - 64
+ cmp $64,%ebx
+ # goto nocopy if unsigned>=
+ jae ._nocopy
+ # ctarget = out
+ movl %edi,228(%esp)
+ # out = &tmp
+ leal 0(%esp),%edi
+ # i = bytes
+ mov %ebx,%ecx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+ # out = &tmp
+ leal 0(%esp),%edi
+ # m = &tmp
+ leal 0(%esp),%esi
+._nocopy:
+ # out_backup = out
+ movl %edi,72(%esp)
+ # m_backup = m
+ movl %esi,68(%esp)
+ # bytes_backup = bytes
+ movl %ebx,76(%esp)
+ # in0 = j0
+ movl 164(%esp),%eax
+ # in1 = j1
+ movl 168(%esp),%ecx
+ # in2 = j2
+ movl 172(%esp),%edx
+ # in3 = j3
+ movl 176(%esp),%ebx
+ # x0 = in0
+ movl %eax,100(%esp)
+ # x1 = in1
+ movl %ecx,104(%esp)
+ # x2 = in2
+ movl %edx,108(%esp)
+ # x3 = in3
+ movl %ebx,112(%esp)
+ # in4 = j4
+ movl 180(%esp),%eax
+ # in5 = j5
+ movl 184(%esp),%ecx
+ # in6 = j6
+ movl 188(%esp),%edx
+ # in7 = j7
+ movl 192(%esp),%ebx
+ # x4 = in4
+ movl %eax,116(%esp)
+ # x5 = in5
+ movl %ecx,120(%esp)
+ # x6 = in6
+ movl %edx,124(%esp)
+ # x7 = in7
+ movl %ebx,128(%esp)
+ # in8 = j8
+ movl 196(%esp),%eax
+ # in9 = j9
+ movl 200(%esp),%ecx
+ # in10 = j10
+ movl 204(%esp),%edx
+ # in11 = j11
+ movl 208(%esp),%ebx
+ # x8 = in8
+ movl %eax,132(%esp)
+ # x9 = in9
+ movl %ecx,136(%esp)
+ # x10 = in10
+ movl %edx,140(%esp)
+ # x11 = in11
+ movl %ebx,144(%esp)
+ # in12 = j12
+ movl 212(%esp),%eax
+ # in13 = j13
+ movl 216(%esp),%ecx
+ # in14 = j14
+ movl 220(%esp),%edx
+ # in15 = j15
+ movl 224(%esp),%ebx
+ # x12 = in12
+ movl %eax,148(%esp)
+ # x13 = in13
+ movl %ecx,152(%esp)
+ # x14 = in14
+ movl %edx,156(%esp)
+ # x15 = in15
+ movl %ebx,160(%esp)
+ # i = 20
+ mov $20,%ebp
+ # p = x0
+ movl 100(%esp),%eax
+ # s = x5
+ movl 120(%esp),%ecx
+ # t = x10
+ movl 140(%esp),%edx
+ # w = x15
+ movl 160(%esp),%ebx
+._mainloop:
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x12
+ addl 148(%esp),%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x6
+ addl 124(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x1
+ movl 104(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x11
+ movl 144(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p <<<= 7
+ rol $7,%eax
+ # p ^= x4
+ xorl 116(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x14
+ xorl 156(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x9
+ xorl 136(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x3
+ xorl 112(%esp),%edi
+ # x4 = p
+ movl %eax,116(%esp)
+ # x14 = t
+ movl %edx,156(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x9 = r
+ movl %esi,136(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x3 = v
+ movl %edi,112(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x8
+ xorl 132(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x2
+ xorl 108(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x13
+ xorl 152(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x7
+ xorl 128(%esp),%ebx
+ # x8 = p
+ movl %eax,132(%esp)
+ # x2 = t
+ movl %edx,108(%esp)
+ # p += x4
+ addl 116(%esp),%eax
+ # x13 = s
+ movl %ecx,152(%esp)
+ # t += x14
+ addl 156(%esp),%edx
+ # x7 = w
+ movl %ebx,128(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x12
+ xorl 148(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x6
+ xorl 124(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x1
+ xorl 104(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x11
+ xorl 144(%esp),%edi
+ # x12 = p
+ movl %eax,148(%esp)
+ # x6 = t
+ movl %edx,124(%esp)
+ # p += x8
+ addl 132(%esp),%eax
+ # x1 = r
+ movl %esi,104(%esp)
+ # t += x2
+ addl 108(%esp),%edx
+ # x11 = v
+ movl %edi,144(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x3
+ addl 112(%esp),%eax
+ # p <<<= 7
+ rol $7,%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x9
+ addl 136(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x4
+ movl 116(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x14
+ movl 156(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p ^= x1
+ xorl 104(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x11
+ xorl 144(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x6
+ xorl 124(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x12
+ xorl 148(%esp),%edi
+ # x1 = p
+ movl %eax,104(%esp)
+ # x11 = t
+ movl %edx,144(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x6 = r
+ movl %esi,124(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x12 = v
+ movl %edi,148(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x2
+ xorl 108(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x8
+ xorl 132(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x7
+ xorl 128(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x13
+ xorl 152(%esp),%ebx
+ # x2 = p
+ movl %eax,108(%esp)
+ # x8 = t
+ movl %edx,132(%esp)
+ # p += x1
+ addl 104(%esp),%eax
+ # x7 = s
+ movl %ecx,128(%esp)
+ # t += x11
+ addl 144(%esp),%edx
+ # x13 = w
+ movl %ebx,152(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x3
+ xorl 112(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x9
+ xorl 136(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x4
+ xorl 116(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x14
+ xorl 156(%esp),%edi
+ # x3 = p
+ movl %eax,112(%esp)
+ # x9 = t
+ movl %edx,136(%esp)
+ # p += x2
+ addl 108(%esp),%eax
+ # x4 = r
+ movl %esi,116(%esp)
+ # t += x8
+ addl 132(%esp),%edx
+ # x14 = v
+ movl %edi,156(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x12
+ addl 148(%esp),%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x6
+ addl 124(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x1
+ movl 104(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x11
+ movl 144(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p <<<= 7
+ rol $7,%eax
+ # p ^= x4
+ xorl 116(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x14
+ xorl 156(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x9
+ xorl 136(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x3
+ xorl 112(%esp),%edi
+ # x4 = p
+ movl %eax,116(%esp)
+ # x14 = t
+ movl %edx,156(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x9 = r
+ movl %esi,136(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x3 = v
+ movl %edi,112(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x8
+ xorl 132(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x2
+ xorl 108(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x13
+ xorl 152(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x7
+ xorl 128(%esp),%ebx
+ # x8 = p
+ movl %eax,132(%esp)
+ # x2 = t
+ movl %edx,108(%esp)
+ # p += x4
+ addl 116(%esp),%eax
+ # x13 = s
+ movl %ecx,152(%esp)
+ # t += x14
+ addl 156(%esp),%edx
+ # x7 = w
+ movl %ebx,128(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x12
+ xorl 148(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x6
+ xorl 124(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x1
+ xorl 104(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x11
+ xorl 144(%esp),%edi
+ # x12 = p
+ movl %eax,148(%esp)
+ # x6 = t
+ movl %edx,124(%esp)
+ # p += x8
+ addl 132(%esp),%eax
+ # x1 = r
+ movl %esi,104(%esp)
+ # t += x2
+ addl 108(%esp),%edx
+ # x11 = v
+ movl %edi,144(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x3
+ addl 112(%esp),%eax
+ # p <<<= 7
+ rol $7,%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x9
+ addl 136(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x4
+ movl 116(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x14
+ movl 156(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p ^= x1
+ xorl 104(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x11
+ xorl 144(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x6
+ xorl 124(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x12
+ xorl 148(%esp),%edi
+ # x1 = p
+ movl %eax,104(%esp)
+ # x11 = t
+ movl %edx,144(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x6 = r
+ movl %esi,124(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x12 = v
+ movl %edi,148(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x2
+ xorl 108(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x8
+ xorl 132(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x7
+ xorl 128(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x13
+ xorl 152(%esp),%ebx
+ # x2 = p
+ movl %eax,108(%esp)
+ # x8 = t
+ movl %edx,132(%esp)
+ # p += x1
+ addl 104(%esp),%eax
+ # x7 = s
+ movl %ecx,128(%esp)
+ # t += x11
+ addl 144(%esp),%edx
+ # x13 = w
+ movl %ebx,152(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x3
+ xorl 112(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x9
+ xorl 136(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x4
+ xorl 116(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x14
+ xorl 156(%esp),%edi
+ # x3 = p
+ movl %eax,112(%esp)
+ # x9 = t
+ movl %edx,136(%esp)
+ # p += x2
+ addl 108(%esp),%eax
+ # x4 = r
+ movl %esi,116(%esp)
+ # t += x8
+ addl 132(%esp),%edx
+ # x14 = v
+ movl %edi,156(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # i -= 4
+ sub $4,%ebp
+ # goto mainloop if unsigned >
+ ja ._mainloop
+ # x0 = p
+ movl %eax,100(%esp)
+ # x5 = s
+ movl %ecx,120(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # x15 = w
+ movl %ebx,160(%esp)
+ # out = out_backup
+ movl 72(%esp),%edi
+ # m = m_backup
+ movl 68(%esp),%esi
+ # in0 = x0
+ movl 100(%esp),%eax
+ # in1 = x1
+ movl 104(%esp),%ecx
+ # in0 += j0
+ addl 164(%esp),%eax
+ # in1 += j1
+ addl 168(%esp),%ecx
+ # in0 ^= *(uint32 *) (m + 0)
+ xorl 0(%esi),%eax
+ # in1 ^= *(uint32 *) (m + 4)
+ xorl 4(%esi),%ecx
+ # *(uint32 *) (out + 0) = in0
+ movl %eax,0(%edi)
+ # *(uint32 *) (out + 4) = in1
+ movl %ecx,4(%edi)
+ # in2 = x2
+ movl 108(%esp),%eax
+ # in3 = x3
+ movl 112(%esp),%ecx
+ # in2 += j2
+ addl 172(%esp),%eax
+ # in3 += j3
+ addl 176(%esp),%ecx
+ # in2 ^= *(uint32 *) (m + 8)
+ xorl 8(%esi),%eax
+ # in3 ^= *(uint32 *) (m + 12)
+ xorl 12(%esi),%ecx
+ # *(uint32 *) (out + 8) = in2
+ movl %eax,8(%edi)
+ # *(uint32 *) (out + 12) = in3
+ movl %ecx,12(%edi)
+ # in4 = x4
+ movl 116(%esp),%eax
+ # in5 = x5
+ movl 120(%esp),%ecx
+ # in4 += j4
+ addl 180(%esp),%eax
+ # in5 += j5
+ addl 184(%esp),%ecx
+ # in4 ^= *(uint32 *) (m + 16)
+ xorl 16(%esi),%eax
+ # in5 ^= *(uint32 *) (m + 20)
+ xorl 20(%esi),%ecx
+ # *(uint32 *) (out + 16) = in4
+ movl %eax,16(%edi)
+ # *(uint32 *) (out + 20) = in5
+ movl %ecx,20(%edi)
+ # in6 = x6
+ movl 124(%esp),%eax
+ # in7 = x7
+ movl 128(%esp),%ecx
+ # in6 += j6
+ addl 188(%esp),%eax
+ # in7 += j7
+ addl 192(%esp),%ecx
+ # in6 ^= *(uint32 *) (m + 24)
+ xorl 24(%esi),%eax
+ # in7 ^= *(uint32 *) (m + 28)
+ xorl 28(%esi),%ecx
+ # *(uint32 *) (out + 24) = in6
+ movl %eax,24(%edi)
+ # *(uint32 *) (out + 28) = in7
+ movl %ecx,28(%edi)
+ # in8 = x8
+ movl 132(%esp),%eax
+ # in9 = x9
+ movl 136(%esp),%ecx
+ # in8 += j8
+ addl 196(%esp),%eax
+ # in9 += j9
+ addl 200(%esp),%ecx
+ # in8 ^= *(uint32 *) (m + 32)
+ xorl 32(%esi),%eax
+ # in9 ^= *(uint32 *) (m + 36)
+ xorl 36(%esi),%ecx
+ # *(uint32 *) (out + 32) = in8
+ movl %eax,32(%edi)
+ # *(uint32 *) (out + 36) = in9
+ movl %ecx,36(%edi)
+ # in10 = x10
+ movl 140(%esp),%eax
+ # in11 = x11
+ movl 144(%esp),%ecx
+ # in10 += j10
+ addl 204(%esp),%eax
+ # in11 += j11
+ addl 208(%esp),%ecx
+ # in10 ^= *(uint32 *) (m + 40)
+ xorl 40(%esi),%eax
+ # in11 ^= *(uint32 *) (m + 44)
+ xorl 44(%esi),%ecx
+ # *(uint32 *) (out + 40) = in10
+ movl %eax,40(%edi)
+ # *(uint32 *) (out + 44) = in11
+ movl %ecx,44(%edi)
+ # in12 = x12
+ movl 148(%esp),%eax
+ # in13 = x13
+ movl 152(%esp),%ecx
+ # in12 += j12
+ addl 212(%esp),%eax
+ # in13 += j13
+ addl 216(%esp),%ecx
+ # in12 ^= *(uint32 *) (m + 48)
+ xorl 48(%esi),%eax
+ # in13 ^= *(uint32 *) (m + 52)
+ xorl 52(%esi),%ecx
+ # *(uint32 *) (out + 48) = in12
+ movl %eax,48(%edi)
+ # *(uint32 *) (out + 52) = in13
+ movl %ecx,52(%edi)
+ # in14 = x14
+ movl 156(%esp),%eax
+ # in15 = x15
+ movl 160(%esp),%ecx
+ # in14 += j14
+ addl 220(%esp),%eax
+ # in15 += j15
+ addl 224(%esp),%ecx
+ # in14 ^= *(uint32 *) (m + 56)
+ xorl 56(%esi),%eax
+ # in15 ^= *(uint32 *) (m + 60)
+ xorl 60(%esi),%ecx
+ # *(uint32 *) (out + 56) = in14
+ movl %eax,56(%edi)
+ # *(uint32 *) (out + 60) = in15
+ movl %ecx,60(%edi)
+ # bytes = bytes_backup
+ movl 76(%esp),%ebx
+ # in8 = j8
+ movl 196(%esp),%eax
+ # in9 = j9
+ movl 200(%esp),%ecx
+ # in8 += 1
+ add $1,%eax
+ # in9 += 0 + carry
+ adc $0,%ecx
+ # j8 = in8
+ movl %eax,196(%esp)
+ # j9 = in9
+ movl %ecx,200(%esp)
+ # bytes - 64
+ cmp $64,%ebx
+ # goto bytesatleast65 if unsigned>
+ ja ._bytesatleast65
+ # goto bytesatleast64 if unsigned>=
+ jae ._bytesatleast64
+ # m = out
+ mov %edi,%esi
+ # out = ctarget
+ movl 228(%esp),%edi
+ # i = bytes
+ mov %ebx,%ecx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+._bytesatleast64:
+ # x = x_backup
+ movl 64(%esp),%eax
+ # in8 = j8
+ movl 196(%esp),%ecx
+ # in9 = j9
+ movl 200(%esp),%edx
+ # *(uint32 *) (x + 32) = in8
+ movl %ecx,32(%eax)
+ # *(uint32 *) (x + 36) = in9
+ movl %edx,36(%eax)
+._done:
+ # eax = eax_stack
+ movl 80(%esp),%eax
+ # ebx = ebx_stack
+ movl 84(%esp),%ebx
+ # esi = esi_stack
+ movl 88(%esp),%esi
+ # edi = edi_stack
+ movl 92(%esp),%edi
+ # ebp = ebp_stack
+ movl 96(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
+._bytesatleast65:
+ # bytes -= 64
+ sub $64,%ebx
+ # out += 64
+ add $64,%edi
+ # m += 64
+ add $64,%esi
+ # goto bytesatleast1
+ jmp ._bytesatleast1
+# enter ECRYPT_keysetup
+.text
+.p2align 5
+.globl ECRYPT_keysetup
+ECRYPT_keysetup:
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,64(%esp)
+ # ebx_stack = ebx
+ movl %ebx,68(%esp)
+ # esi_stack = esi
+ movl %esi,72(%esp)
+ # edi_stack = edi
+ movl %edi,76(%esp)
+ # ebp_stack = ebp
+ movl %ebp,80(%esp)
+ # k = arg2
+ movl 8(%esp,%eax),%ecx
+ # kbits = arg3
+ movl 12(%esp,%eax),%edx
+ # x = arg1
+ movl 4(%esp,%eax),%eax
+ # in1 = *(uint32 *) (k + 0)
+ movl 0(%ecx),%ebx
+ # in2 = *(uint32 *) (k + 4)
+ movl 4(%ecx),%esi
+ # in3 = *(uint32 *) (k + 8)
+ movl 8(%ecx),%edi
+ # in4 = *(uint32 *) (k + 12)
+ movl 12(%ecx),%ebp
+ # *(uint32 *) (x + 4) = in1
+ movl %ebx,4(%eax)
+ # *(uint32 *) (x + 8) = in2
+ movl %esi,8(%eax)
+ # *(uint32 *) (x + 12) = in3
+ movl %edi,12(%eax)
+ # *(uint32 *) (x + 16) = in4
+ movl %ebp,16(%eax)
+ # kbits - 256
+ cmp $256,%edx
+ # goto kbits128 if unsigned<
+ jb ._kbits128
+._kbits256:
+ # in11 = *(uint32 *) (k + 16)
+ movl 16(%ecx),%edx
+ # in12 = *(uint32 *) (k + 20)
+ movl 20(%ecx),%ebx
+ # in13 = *(uint32 *) (k + 24)
+ movl 24(%ecx),%esi
+ # in14 = *(uint32 *) (k + 28)
+ movl 28(%ecx),%ecx
+ # *(uint32 *) (x + 44) = in11
+ movl %edx,44(%eax)
+ # *(uint32 *) (x + 48) = in12
+ movl %ebx,48(%eax)
+ # *(uint32 *) (x + 52) = in13
+ movl %esi,52(%eax)
+ # *(uint32 *) (x + 56) = in14
+ movl %ecx,56(%eax)
+ # in0 = 1634760805
+ mov $1634760805,%ecx
+ # in5 = 857760878
+ mov $857760878,%edx
+ # in10 = 2036477234
+ mov $2036477234,%ebx
+ # in15 = 1797285236
+ mov $1797285236,%esi
+ # *(uint32 *) (x + 0) = in0
+ movl %ecx,0(%eax)
+ # *(uint32 *) (x + 20) = in5
+ movl %edx,20(%eax)
+ # *(uint32 *) (x + 40) = in10
+ movl %ebx,40(%eax)
+ # *(uint32 *) (x + 60) = in15
+ movl %esi,60(%eax)
+ # goto keysetupdone
+ jmp ._keysetupdone
+._kbits128:
+ # in11 = *(uint32 *) (k + 0)
+ movl 0(%ecx),%edx
+ # in12 = *(uint32 *) (k + 4)
+ movl 4(%ecx),%ebx
+ # in13 = *(uint32 *) (k + 8)
+ movl 8(%ecx),%esi
+ # in14 = *(uint32 *) (k + 12)
+ movl 12(%ecx),%ecx
+ # *(uint32 *) (x + 44) = in11
+ movl %edx,44(%eax)
+ # *(uint32 *) (x + 48) = in12
+ movl %ebx,48(%eax)
+ # *(uint32 *) (x + 52) = in13
+ movl %esi,52(%eax)
+ # *(uint32 *) (x + 56) = in14
+ movl %ecx,56(%eax)
+ # in0 = 1634760805
+ mov $1634760805,%ecx
+ # in5 = 824206446
+ mov $824206446,%edx
+ # in10 = 2036477238
+ mov $2036477238,%ebx
+ # in15 = 1797285236
+ mov $1797285236,%esi
+ # *(uint32 *) (x + 0) = in0
+ movl %ecx,0(%eax)
+ # *(uint32 *) (x + 20) = in5
+ movl %edx,20(%eax)
+ # *(uint32 *) (x + 40) = in10
+ movl %ebx,40(%eax)
+ # *(uint32 *) (x + 60) = in15
+ movl %esi,60(%eax)
+._keysetupdone:
+ # eax = eax_stack
+ movl 64(%esp),%eax
+ # ebx = ebx_stack
+ movl 68(%esp),%ebx
+ # esi = esi_stack
+ movl 72(%esp),%esi
+ # edi = edi_stack
+ movl 76(%esp),%edi
+ # ebp = ebp_stack
+ movl 80(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
+# enter ECRYPT_ivsetup
+.text
+.p2align 5
+.globl ECRYPT_ivsetup
+ECRYPT_ivsetup:
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,64(%esp)
+ # ebx_stack = ebx
+ movl %ebx,68(%esp)
+ # esi_stack = esi
+ movl %esi,72(%esp)
+ # edi_stack = edi
+ movl %edi,76(%esp)
+ # ebp_stack = ebp
+ movl %ebp,80(%esp)
+ # iv = arg2
+ movl 8(%esp,%eax),%ecx
+ # x = arg1
+ movl 4(%esp,%eax),%eax
+ # in6 = *(uint32 *) (iv + 0)
+ movl 0(%ecx),%edx
+ # in7 = *(uint32 *) (iv + 4)
+ movl 4(%ecx),%ecx
+ # in8 = 0
+ mov $0,%ebx
+ # in9 = 0
+ mov $0,%esi
+ # *(uint32 *) (x + 24) = in6
+ movl %edx,24(%eax)
+ # *(uint32 *) (x + 28) = in7
+ movl %ecx,28(%eax)
+ # *(uint32 *) (x + 32) = in8
+ movl %ebx,32(%eax)
+ # *(uint32 *) (x + 36) = in9
+ movl %esi,36(%eax)
+ # eax = eax_stack
+ movl 64(%esp),%eax
+ # ebx = ebx_stack
+ movl 68(%esp),%ebx
+ # esi = esi_stack
+ movl 72(%esp),%esi
+ # edi = edi_stack
+ movl 76(%esp),%edi
+ # ebp = ebp_stack
+ movl 80(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
new file mode 100644
index 00000000000..6214a9b0970
--- /dev/null
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -0,0 +1,920 @@
+# enter ECRYPT_encrypt_bytes
+.text
+.p2align 5
+.globl ECRYPT_encrypt_bytes
+ECRYPT_encrypt_bytes:
+ mov %rsp,%r11
+ and $31,%r11
+ add $256,%r11
+ sub %r11,%rsp
+ # x = arg1
+ mov %rdi,%r8
+ # m = arg2
+ mov %rsi,%rsi
+ # out = arg3
+ mov %rdx,%rdi
+ # bytes = arg4
+ mov %rcx,%rdx
+ # unsigned>? bytes - 0
+ cmp $0,%rdx
+ # comment:fp stack unchanged by jump
+ # goto done if !unsigned>
+ jbe ._done
+ # comment:fp stack unchanged by fallthrough
+# start:
+._start:
+ # r11_stack = r11
+ movq %r11,0(%rsp)
+ # r12_stack = r12
+ movq %r12,8(%rsp)
+ # r13_stack = r13
+ movq %r13,16(%rsp)
+ # r14_stack = r14
+ movq %r14,24(%rsp)
+ # r15_stack = r15
+ movq %r15,32(%rsp)
+ # rbx_stack = rbx
+ movq %rbx,40(%rsp)
+ # rbp_stack = rbp
+ movq %rbp,48(%rsp)
+ # in0 = *(uint64 *) (x + 0)
+ movq 0(%r8),%rcx
+ # in2 = *(uint64 *) (x + 8)
+ movq 8(%r8),%r9
+ # in4 = *(uint64 *) (x + 16)
+ movq 16(%r8),%rax
+ # in6 = *(uint64 *) (x + 24)
+ movq 24(%r8),%r10
+ # in8 = *(uint64 *) (x + 32)
+ movq 32(%r8),%r11
+ # in10 = *(uint64 *) (x + 40)
+ movq 40(%r8),%r12
+ # in12 = *(uint64 *) (x + 48)
+ movq 48(%r8),%r13
+ # in14 = *(uint64 *) (x + 56)
+ movq 56(%r8),%r14
+ # j0 = in0
+ movq %rcx,56(%rsp)
+ # j2 = in2
+ movq %r9,64(%rsp)
+ # j4 = in4
+ movq %rax,72(%rsp)
+ # j6 = in6
+ movq %r10,80(%rsp)
+ # j8 = in8
+ movq %r11,88(%rsp)
+ # j10 = in10
+ movq %r12,96(%rsp)
+ # j12 = in12
+ movq %r13,104(%rsp)
+ # j14 = in14
+ movq %r14,112(%rsp)
+ # x_backup = x
+ movq %r8,120(%rsp)
+# bytesatleast1:
+._bytesatleast1:
+ # unsigned<? bytes - 64
+ cmp $64,%rdx
+ # comment:fp stack unchanged by jump
+ # goto nocopy if !unsigned<
+ jae ._nocopy
+ # ctarget = out
+ movq %rdi,128(%rsp)
+ # out = &tmp
+ leaq 192(%rsp),%rdi
+ # i = bytes
+ mov %rdx,%rcx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+ # out = &tmp
+ leaq 192(%rsp),%rdi
+ # m = &tmp
+ leaq 192(%rsp),%rsi
+ # comment:fp stack unchanged by fallthrough
+# nocopy:
+._nocopy:
+ # out_backup = out
+ movq %rdi,136(%rsp)
+ # m_backup = m
+ movq %rsi,144(%rsp)
+ # bytes_backup = bytes
+ movq %rdx,152(%rsp)
+ # x1 = j0
+ movq 56(%rsp),%rdi
+ # x0 = x1
+ mov %rdi,%rdx
+ # (uint64) x1 >>= 32
+ shr $32,%rdi
+ # x3 = j2
+ movq 64(%rsp),%rsi
+ # x2 = x3
+ mov %rsi,%rcx
+ # (uint64) x3 >>= 32
+ shr $32,%rsi
+ # x5 = j4
+ movq 72(%rsp),%r8
+ # x4 = x5
+ mov %r8,%r9
+ # (uint64) x5 >>= 32
+ shr $32,%r8
+ # x5_stack = x5
+ movq %r8,160(%rsp)
+ # x7 = j6
+ movq 80(%rsp),%r8
+ # x6 = x7
+ mov %r8,%rax
+ # (uint64) x7 >>= 32
+ shr $32,%r8
+ # x9 = j8
+ movq 88(%rsp),%r10
+ # x8 = x9
+ mov %r10,%r11
+ # (uint64) x9 >>= 32
+ shr $32,%r10
+ # x11 = j10
+ movq 96(%rsp),%r12
+ # x10 = x11
+ mov %r12,%r13
+ # x10_stack = x10
+ movq %r13,168(%rsp)
+ # (uint64) x11 >>= 32
+ shr $32,%r12
+ # x13 = j12
+ movq 104(%rsp),%r13
+ # x12 = x13
+ mov %r13,%r14
+ # (uint64) x13 >>= 32
+ shr $32,%r13
+ # x15 = j14
+ movq 112(%rsp),%r15
+ # x14 = x15
+ mov %r15,%rbx
+ # (uint64) x15 >>= 32
+ shr $32,%r15
+ # x15_stack = x15
+ movq %r15,176(%rsp)
+ # i = 20
+ mov $20,%r15
+# mainloop:
+._mainloop:
+ # i_backup = i
+ movq %r15,184(%rsp)
+ # x5 = x5_stack
+ movq 160(%rsp),%r15
+ # a = x12 + x0
+ lea (%r14,%rdx),%rbp
+ # (uint32) a <<<= 7
+ rol $7,%ebp
+ # x4 ^= a
+ xor %rbp,%r9
+ # b = x1 + x5
+ lea (%rdi,%r15),%rbp
+ # (uint32) b <<<= 7
+ rol $7,%ebp
+ # x9 ^= b
+ xor %rbp,%r10
+ # a = x0 + x4
+ lea (%rdx,%r9),%rbp
+ # (uint32) a <<<= 9
+ rol $9,%ebp
+ # x8 ^= a
+ xor %rbp,%r11
+ # b = x5 + x9
+ lea (%r15,%r10),%rbp
+ # (uint32) b <<<= 9
+ rol $9,%ebp
+ # x13 ^= b
+ xor %rbp,%r13
+ # a = x4 + x8
+ lea (%r9,%r11),%rbp
+ # (uint32) a <<<= 13
+ rol $13,%ebp
+ # x12 ^= a
+ xor %rbp,%r14
+ # b = x9 + x13
+ lea (%r10,%r13),%rbp
+ # (uint32) b <<<= 13
+ rol $13,%ebp
+ # x1 ^= b
+ xor %rbp,%rdi
+ # a = x8 + x12
+ lea (%r11,%r14),%rbp
+ # (uint32) a <<<= 18
+ rol $18,%ebp
+ # x0 ^= a
+ xor %rbp,%rdx
+ # b = x13 + x1
+ lea (%r13,%rdi),%rbp
+ # (uint32) b <<<= 18
+ rol $18,%ebp
+ # x5 ^= b
+ xor %rbp,%r15
+ # x10 = x10_stack
+ movq 168(%rsp),%rbp
+ # x5_stack = x5
+ movq %r15,160(%rsp)
+ # c = x6 + x10
+ lea (%rax,%rbp),%r15
+ # (uint32) c <<<= 7
+ rol $7,%r15d
+ # x14 ^= c
+ xor %r15,%rbx
+ # c = x10 + x14
+ lea (%rbp,%rbx),%r15
+ # (uint32) c <<<= 9
+ rol $9,%r15d
+ # x2 ^= c
+ xor %r15,%rcx
+ # c = x14 + x2
+ lea (%rbx,%rcx),%r15
+ # (uint32) c <<<= 13
+ rol $13,%r15d
+ # x6 ^= c
+ xor %r15,%rax
+ # c = x2 + x6
+ lea (%rcx,%rax),%r15
+ # (uint32) c <<<= 18
+ rol $18,%r15d
+ # x10 ^= c
+ xor %r15,%rbp
+ # x15 = x15_stack
+ movq 176(%rsp),%r15
+ # x10_stack = x10
+ movq %rbp,168(%rsp)
+ # d = x11 + x15
+ lea (%r12,%r15),%rbp
+ # (uint32) d <<<= 7
+ rol $7,%ebp
+ # x3 ^= d
+ xor %rbp,%rsi
+ # d = x15 + x3
+ lea (%r15,%rsi),%rbp
+ # (uint32) d <<<= 9
+ rol $9,%ebp
+ # x7 ^= d
+ xor %rbp,%r8
+ # d = x3 + x7
+ lea (%rsi,%r8),%rbp
+ # (uint32) d <<<= 13
+ rol $13,%ebp
+ # x11 ^= d
+ xor %rbp,%r12
+ # d = x7 + x11
+ lea (%r8,%r12),%rbp
+ # (uint32) d <<<= 18
+ rol $18,%ebp
+ # x15 ^= d
+ xor %rbp,%r15
+ # x15_stack = x15
+ movq %r15,176(%rsp)
+ # x5 = x5_stack
+ movq 160(%rsp),%r15
+ # a = x3 + x0
+ lea (%rsi,%rdx),%rbp
+ # (uint32) a <<<= 7
+ rol $7,%ebp
+ # x1 ^= a
+ xor %rbp,%rdi
+ # b = x4 + x5
+ lea (%r9,%r15),%rbp
+ # (uint32) b <<<= 7
+ rol $7,%ebp
+ # x6 ^= b
+ xor %rbp,%rax
+ # a = x0 + x1
+ lea (%rdx,%rdi),%rbp
+ # (uint32) a <<<= 9
+ rol $9,%ebp
+ # x2 ^= a
+ xor %rbp,%rcx
+ # b = x5 + x6
+ lea (%r15,%rax),%rbp
+ # (uint32) b <<<= 9
+ rol $9,%ebp
+ # x7 ^= b
+ xor %rbp,%r8
+ # a = x1 + x2
+ lea (%rdi,%rcx),%rbp
+ # (uint32) a <<<= 13
+ rol $13,%ebp
+ # x3 ^= a
+ xor %rbp,%rsi
+ # b = x6 + x7
+ lea (%rax,%r8),%rbp
+ # (uint32) b <<<= 13
+ rol $13,%ebp
+ # x4 ^= b
+ xor %rbp,%r9
+ # a = x2 + x3
+ lea (%rcx,%rsi),%rbp
+ # (uint32) a <<<= 18
+ rol $18,%ebp
+ # x0 ^= a
+ xor %rbp,%rdx
+ # b = x7 + x4
+ lea (%r8,%r9),%rbp
+ # (uint32) b <<<= 18
+ rol $18,%ebp
+ # x5 ^= b
+ xor %rbp,%r15
+ # x10 = x10_stack
+ movq 168(%rsp),%rbp
+ # x5_stack = x5
+ movq %r15,160(%rsp)
+ # c = x9 + x10
+ lea (%r10,%rbp),%r15
+ # (uint32) c <<<= 7
+ rol $7,%r15d
+ # x11 ^= c
+ xor %r15,%r12
+ # c = x10 + x11
+ lea (%rbp,%r12),%r15
+ # (uint32) c <<<= 9
+ rol $9,%r15d
+ # x8 ^= c
+ xor %r15,%r11
+ # c = x11 + x8
+ lea (%r12,%r11),%r15
+ # (uint32) c <<<= 13
+ rol $13,%r15d
+ # x9 ^= c
+ xor %r15,%r10
+ # c = x8 + x9
+ lea (%r11,%r10),%r15
+ # (uint32) c <<<= 18
+ rol $18,%r15d
+ # x10 ^= c
+ xor %r15,%rbp
+ # x15 = x15_stack
+ movq 176(%rsp),%r15
+ # x10_stack = x10
+ movq %rbp,168(%rsp)
+ # d = x14 + x15
+ lea (%rbx,%r15),%rbp
+ # (uint32) d <<<= 7
+ rol $7,%ebp
+ # x12 ^= d
+ xor %rbp,%r14
+ # d = x15 + x12
+ lea (%r15,%r14),%rbp
+ # (uint32) d <<<= 9
+ rol $9,%ebp
+ # x13 ^= d
+ xor %rbp,%r13
+ # d = x12 + x13
+ lea (%r14,%r13),%rbp
+ # (uint32) d <<<= 13
+ rol $13,%ebp
+ # x14 ^= d
+ xor %rbp,%rbx
+ # d = x13 + x14
+ lea (%r13,%rbx),%rbp
+ # (uint32) d <<<= 18
+ rol $18,%ebp
+ # x15 ^= d
+ xor %rbp,%r15
+ # x15_stack = x15
+ movq %r15,176(%rsp)
+ # x5 = x5_stack
+ movq 160(%rsp),%r15
+ # a = x12 + x0
+ lea (%r14,%rdx),%rbp
+ # (uint32) a <<<= 7
+ rol $7,%ebp
+ # x4 ^= a
+ xor %rbp,%r9
+ # b = x1 + x5
+ lea (%rdi,%r15),%rbp
+ # (uint32) b <<<= 7
+ rol $7,%ebp
+ # x9 ^= b
+ xor %rbp,%r10
+ # a = x0 + x4
+ lea (%rdx,%r9),%rbp
+ # (uint32) a <<<= 9
+ rol $9,%ebp
+ # x8 ^= a
+ xor %rbp,%r11
+ # b = x5 + x9
+ lea (%r15,%r10),%rbp
+ # (uint32) b <<<= 9
+ rol $9,%ebp
+ # x13 ^= b
+ xor %rbp,%r13
+ # a = x4 + x8
+ lea (%r9,%r11),%rbp
+ # (uint32) a <<<= 13
+ rol $13,%ebp
+ # x12 ^= a
+ xor %rbp,%r14
+ # b = x9 + x13
+ lea (%r10,%r13),%rbp
+ # (uint32) b <<<= 13
+ rol $13,%ebp
+ # x1 ^= b
+ xor %rbp,%rdi
+ # a = x8 + x12
+ lea (%r11,%r14),%rbp
+ # (uint32) a <<<= 18
+ rol $18,%ebp
+ # x0 ^= a
+ xor %rbp,%rdx
+ # b = x13 + x1
+ lea (%r13,%rdi),%rbp
+ # (uint32) b <<<= 18
+ rol $18,%ebp
+ # x5 ^= b
+ xor %rbp,%r15
+ # x10 = x10_stack
+ movq 168(%rsp),%rbp
+ # x5_stack = x5
+ movq %r15,160(%rsp)
+ # c = x6 + x10
+ lea (%rax,%rbp),%r15
+ # (uint32) c <<<= 7
+ rol $7,%r15d
+ # x14 ^= c
+ xor %r15,%rbx
+ # c = x10 + x14
+ lea (%rbp,%rbx),%r15
+ # (uint32) c <<<= 9
+ rol $9,%r15d
+ # x2 ^= c
+ xor %r15,%rcx
+ # c = x14 + x2
+ lea (%rbx,%rcx),%r15
+ # (uint32) c <<<= 13
+ rol $13,%r15d
+ # x6 ^= c
+ xor %r15,%rax
+ # c = x2 + x6
+ lea (%rcx,%rax),%r15
+ # (uint32) c <<<= 18
+ rol $18,%r15d
+ # x10 ^= c
+ xor %r15,%rbp
+ # x15 = x15_stack
+ movq 176(%rsp),%r15
+ # x10_stack = x10
+ movq %rbp,168(%rsp)
+ # d = x11 + x15
+ lea (%r12,%r15),%rbp
+ # (uint32) d <<<= 7
+ rol $7,%ebp
+ # x3 ^= d
+ xor %rbp,%rsi
+ # d = x15 + x3
+ lea (%r15,%rsi),%rbp
+ # (uint32) d <<<= 9
+ rol $9,%ebp
+ # x7 ^= d
+ xor %rbp,%r8
+ # d = x3 + x7
+ lea (%rsi,%r8),%rbp
+ # (uint32) d <<<= 13
+ rol $13,%ebp
+ # x11 ^= d
+ xor %rbp,%r12
+ # d = x7 + x11
+ lea (%r8,%r12),%rbp
+ # (uint32) d <<<= 18
+ rol $18,%ebp
+ # x15 ^= d
+ xor %rbp,%r15
+ # x15_stack = x15
+ movq %r15,176(%rsp)
+ # x5 = x5_stack
+ movq 160(%rsp),%r15
+ # a = x3 + x0
+ lea (%rsi,%rdx),%rbp
+ # (uint32) a <<<= 7
+ rol $7,%ebp
+ # x1 ^= a
+ xor %rbp,%rdi
+ # b = x4 + x5
+ lea (%r9,%r15),%rbp
+ # (uint32) b <<<= 7
+ rol $7,%ebp
+ # x6 ^= b
+ xor %rbp,%rax
+ # a = x0 + x1
+ lea (%rdx,%rdi),%rbp
+ # (uint32) a <<<= 9
+ rol $9,%ebp
+ # x2 ^= a
+ xor %rbp,%rcx
+ # b = x5 + x6
+ lea (%r15,%rax),%rbp
+ # (uint32) b <<<= 9
+ rol $9,%ebp
+ # x7 ^= b
+ xor %rbp,%r8
+ # a = x1 + x2
+ lea (%rdi,%rcx),%rbp
+ # (uint32) a <<<= 13
+ rol $13,%ebp
+ # x3 ^= a
+ xor %rbp,%rsi
+ # b = x6 + x7
+ lea (%rax,%r8),%rbp
+ # (uint32) b <<<= 13
+ rol $13,%ebp
+ # x4 ^= b
+ xor %rbp,%r9
+ # a = x2 + x3
+ lea (%rcx,%rsi),%rbp
+ # (uint32) a <<<= 18
+ rol $18,%ebp
+ # x0 ^= a
+ xor %rbp,%rdx
+ # b = x7 + x4
+ lea (%r8,%r9),%rbp
+ # (uint32) b <<<= 18
+ rol $18,%ebp
+ # x5 ^= b
+ xor %rbp,%r15
+ # x10 = x10_stack
+ movq 168(%rsp),%rbp
+ # x5_stack = x5
+ movq %r15,160(%rsp)
+ # c = x9 + x10
+ lea (%r10,%rbp),%r15
+ # (uint32) c <<<= 7
+ rol $7,%r15d
+ # x11 ^= c
+ xor %r15,%r12
+ # c = x10 + x11
+ lea (%rbp,%r12),%r15
+ # (uint32) c <<<= 9
+ rol $9,%r15d
+ # x8 ^= c
+ xor %r15,%r11
+ # c = x11 + x8
+ lea (%r12,%r11),%r15
+ # (uint32) c <<<= 13
+ rol $13,%r15d
+ # x9 ^= c
+ xor %r15,%r10
+ # c = x8 + x9
+ lea (%r11,%r10),%r15
+ # (uint32) c <<<= 18
+ rol $18,%r15d
+ # x10 ^= c
+ xor %r15,%rbp
+ # x15 = x15_stack
+ movq 176(%rsp),%r15
+ # x10_stack = x10
+ movq %rbp,168(%rsp)
+ # d = x14 + x15
+ lea (%rbx,%r15),%rbp
+ # (uint32) d <<<= 7
+ rol $7,%ebp
+ # x12 ^= d
+ xor %rbp,%r14
+ # d = x15 + x12
+ lea (%r15,%r14),%rbp
+ # (uint32) d <<<= 9
+ rol $9,%ebp
+ # x13 ^= d
+ xor %rbp,%r13
+ # d = x12 + x13
+ lea (%r14,%r13),%rbp
+ # (uint32) d <<<= 13
+ rol $13,%ebp
+ # x14 ^= d
+ xor %rbp,%rbx
+ # d = x13 + x14
+ lea (%r13,%rbx),%rbp
+ # (uint32) d <<<= 18
+ rol $18,%ebp
+ # x15 ^= d
+ xor %rbp,%r15
+ # x15_stack = x15
+ movq %r15,176(%rsp)
+ # i = i_backup
+ movq 184(%rsp),%r15
+ # unsigned>? i -= 4
+ sub $4,%r15
+ # comment:fp stack unchanged by jump
+ # goto mainloop if unsigned>
+ ja ._mainloop
+ # (uint32) x2 += j2
+ addl 64(%rsp),%ecx
+ # x3 <<= 32
+ shl $32,%rsi
+ # x3 += j2
+ addq 64(%rsp),%rsi
+ # (uint64) x3 >>= 32
+ shr $32,%rsi
+ # x3 <<= 32
+ shl $32,%rsi
+ # x2 += x3
+ add %rsi,%rcx
+ # (uint32) x6 += j6
+ addl 80(%rsp),%eax
+ # x7 <<= 32
+ shl $32,%r8
+ # x7 += j6
+ addq 80(%rsp),%r8
+ # (uint64) x7 >>= 32
+ shr $32,%r8
+ # x7 <<= 32
+ shl $32,%r8
+ # x6 += x7
+ add %r8,%rax
+ # (uint32) x8 += j8
+ addl 88(%rsp),%r11d
+ # x9 <<= 32
+ shl $32,%r10
+ # x9 += j8
+ addq 88(%rsp),%r10
+ # (uint64) x9 >>= 32
+ shr $32,%r10
+ # x9 <<= 32
+ shl $32,%r10
+ # x8 += x9
+ add %r10,%r11
+ # (uint32) x12 += j12
+ addl 104(%rsp),%r14d
+ # x13 <<= 32
+ shl $32,%r13
+ # x13 += j12
+ addq 104(%rsp),%r13
+ # (uint64) x13 >>= 32
+ shr $32,%r13
+ # x13 <<= 32
+ shl $32,%r13
+ # x12 += x13
+ add %r13,%r14
+ # (uint32) x0 += j0
+ addl 56(%rsp),%edx
+ # x1 <<= 32
+ shl $32,%rdi
+ # x1 += j0
+ addq 56(%rsp),%rdi
+ # (uint64) x1 >>= 32
+ shr $32,%rdi
+ # x1 <<= 32
+ shl $32,%rdi
+ # x0 += x1
+ add %rdi,%rdx
+ # x5 = x5_stack
+ movq 160(%rsp),%rdi
+ # (uint32) x4 += j4
+ addl 72(%rsp),%r9d
+ # x5 <<= 32
+ shl $32,%rdi
+ # x5 += j4
+ addq 72(%rsp),%rdi
+ # (uint64) x5 >>= 32
+ shr $32,%rdi
+ # x5 <<= 32
+ shl $32,%rdi
+ # x4 += x5
+ add %rdi,%r9
+ # x10 = x10_stack
+ movq 168(%rsp),%r8
+ # (uint32) x10 += j10
+ addl 96(%rsp),%r8d
+ # x11 <<= 32
+ shl $32,%r12
+ # x11 += j10
+ addq 96(%rsp),%r12
+ # (uint64) x11 >>= 32
+ shr $32,%r12
+ # x11 <<= 32
+ shl $32,%r12
+ # x10 += x11
+ add %r12,%r8
+ # x15 = x15_stack
+ movq 176(%rsp),%rdi
+ # (uint32) x14 += j14
+ addl 112(%rsp),%ebx
+ # x15 <<= 32
+ shl $32,%rdi
+ # x15 += j14
+ addq 112(%rsp),%rdi
+ # (uint64) x15 >>= 32
+ shr $32,%rdi
+ # x15 <<= 32
+ shl $32,%rdi
+ # x14 += x15
+ add %rdi,%rbx
+ # out = out_backup
+ movq 136(%rsp),%rdi
+ # m = m_backup
+ movq 144(%rsp),%rsi
+ # x0 ^= *(uint64 *) (m + 0)
+ xorq 0(%rsi),%rdx
+ # *(uint64 *) (out + 0) = x0
+ movq %rdx,0(%rdi)
+ # x2 ^= *(uint64 *) (m + 8)
+ xorq 8(%rsi),%rcx
+ # *(uint64 *) (out + 8) = x2
+ movq %rcx,8(%rdi)
+ # x4 ^= *(uint64 *) (m + 16)
+ xorq 16(%rsi),%r9
+ # *(uint64 *) (out + 16) = x4
+ movq %r9,16(%rdi)
+ # x6 ^= *(uint64 *) (m + 24)
+ xorq 24(%rsi),%rax
+ # *(uint64 *) (out + 24) = x6
+ movq %rax,24(%rdi)
+ # x8 ^= *(uint64 *) (m + 32)
+ xorq 32(%rsi),%r11
+ # *(uint64 *) (out + 32) = x8
+ movq %r11,32(%rdi)
+ # x10 ^= *(uint64 *) (m + 40)
+ xorq 40(%rsi),%r8
+ # *(uint64 *) (out + 40) = x10
+ movq %r8,40(%rdi)
+ # x12 ^= *(uint64 *) (m + 48)
+ xorq 48(%rsi),%r14
+ # *(uint64 *) (out + 48) = x12
+ movq %r14,48(%rdi)
+ # x14 ^= *(uint64 *) (m + 56)
+ xorq 56(%rsi),%rbx
+ # *(uint64 *) (out + 56) = x14
+ movq %rbx,56(%rdi)
+ # bytes = bytes_backup
+ movq 152(%rsp),%rdx
+ # in8 = j8
+ movq 88(%rsp),%rcx
+ # in8 += 1
+ add $1,%rcx
+ # j8 = in8
+ movq %rcx,88(%rsp)
+ # unsigned>? unsigned<? bytes - 64
+ cmp $64,%rdx
+ # comment:fp stack unchanged by jump
+ # goto bytesatleast65 if unsigned>
+ ja ._bytesatleast65
+ # comment:fp stack unchanged by jump
+ # goto bytesatleast64 if !unsigned<
+ jae ._bytesatleast64
+ # m = out
+ mov %rdi,%rsi
+ # out = ctarget
+ movq 128(%rsp),%rdi
+ # i = bytes
+ mov %rdx,%rcx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+ # comment:fp stack unchanged by fallthrough
+# bytesatleast64:
+._bytesatleast64:
+ # x = x_backup
+ movq 120(%rsp),%rdi
+ # in8 = j8
+ movq 88(%rsp),%rsi
+ # *(uint64 *) (x + 32) = in8
+ movq %rsi,32(%rdi)
+ # r11 = r11_stack
+ movq 0(%rsp),%r11
+ # r12 = r12_stack
+ movq 8(%rsp),%r12
+ # r13 = r13_stack
+ movq 16(%rsp),%r13
+ # r14 = r14_stack
+ movq 24(%rsp),%r14
+ # r15 = r15_stack
+ movq 32(%rsp),%r15
+ # rbx = rbx_stack
+ movq 40(%rsp),%rbx
+ # rbp = rbp_stack
+ movq 48(%rsp),%rbp
+ # comment:fp stack unchanged by fallthrough
+# done:
+._done:
+ # leave
+ add %r11,%rsp
+ mov %rdi,%rax
+ mov %rsi,%rdx
+ ret
+# bytesatleast65:
+._bytesatleast65:
+ # bytes -= 64
+ sub $64,%rdx
+ # out += 64
+ add $64,%rdi
+ # m += 64
+ add $64,%rsi
+ # comment:fp stack unchanged by jump
+ # goto bytesatleast1
+ jmp ._bytesatleast1
+# enter ECRYPT_keysetup
+.text
+.p2align 5
+.globl ECRYPT_keysetup
+ECRYPT_keysetup:
+ mov %rsp,%r11
+ and $31,%r11
+ add $256,%r11
+ sub %r11,%rsp
+ # k = arg2
+ mov %rsi,%rsi
+ # kbits = arg3
+ mov %rdx,%rdx
+ # x = arg1
+ mov %rdi,%rdi
+ # in0 = *(uint64 *) (k + 0)
+ movq 0(%rsi),%r8
+ # in2 = *(uint64 *) (k + 8)
+ movq 8(%rsi),%r9
+ # *(uint64 *) (x + 4) = in0
+ movq %r8,4(%rdi)
+ # *(uint64 *) (x + 12) = in2
+ movq %r9,12(%rdi)
+ # unsigned<? kbits - 256
+ cmp $256,%rdx
+ # comment:fp stack unchanged by jump
+ # goto kbits128 if unsigned<
+ jb ._kbits128
+# kbits256:
+._kbits256:
+ # in10 = *(uint64 *) (k + 16)
+ movq 16(%rsi),%rdx
+ # in12 = *(uint64 *) (k + 24)
+ movq 24(%rsi),%rsi
+ # *(uint64 *) (x + 44) = in10
+ movq %rdx,44(%rdi)
+ # *(uint64 *) (x + 52) = in12
+ movq %rsi,52(%rdi)
+ # in0 = 1634760805
+ mov $1634760805,%rsi
+ # in4 = 857760878
+ mov $857760878,%rdx
+ # in10 = 2036477234
+ mov $2036477234,%rcx
+ # in14 = 1797285236
+ mov $1797285236,%r8
+ # *(uint32 *) (x + 0) = in0
+ movl %esi,0(%rdi)
+ # *(uint32 *) (x + 20) = in4
+ movl %edx,20(%rdi)
+ # *(uint32 *) (x + 40) = in10
+ movl %ecx,40(%rdi)
+ # *(uint32 *) (x + 60) = in14
+ movl %r8d,60(%rdi)
+ # comment:fp stack unchanged by jump
+ # goto keysetupdone
+ jmp ._keysetupdone
+# kbits128:
+._kbits128:
+ # in10 = *(uint64 *) (k + 0)
+ movq 0(%rsi),%rdx
+ # in12 = *(uint64 *) (k + 8)
+ movq 8(%rsi),%rsi
+ # *(uint64 *) (x + 44) = in10
+ movq %rdx,44(%rdi)
+ # *(uint64 *) (x + 52) = in12
+ movq %rsi,52(%rdi)
+ # in0 = 1634760805
+ mov $1634760805,%rsi
+ # in4 = 824206446
+ mov $824206446,%rdx
+ # in10 = 2036477238
+ mov $2036477238,%rcx
+ # in14 = 1797285236
+ mov $1797285236,%r8
+ # *(uint32 *) (x + 0) = in0
+ movl %esi,0(%rdi)
+ # *(uint32 *) (x + 20) = in4
+ movl %edx,20(%rdi)
+ # *(uint32 *) (x + 40) = in10
+ movl %ecx,40(%rdi)
+ # *(uint32 *) (x + 60) = in14
+ movl %r8d,60(%rdi)
+# keysetupdone:
+._keysetupdone:
+ # leave
+ add %r11,%rsp
+ mov %rdi,%rax
+ mov %rsi,%rdx
+ ret
+# enter ECRYPT_ivsetup
+.text
+.p2align 5
+.globl ECRYPT_ivsetup
+ECRYPT_ivsetup:
+ mov %rsp,%r11
+ and $31,%r11
+ add $256,%r11
+ sub %r11,%rsp
+ # iv = arg2
+ mov %rsi,%rsi
+ # x = arg1
+ mov %rdi,%rdi
+ # in6 = *(uint64 *) (iv + 0)
+ movq 0(%rsi),%rsi
+ # in8 = 0
+ mov $0,%r8
+ # *(uint64 *) (x + 24) = in6
+ movq %rsi,24(%rdi)
+ # *(uint64 *) (x + 32) = in8
+ movq %r8,32(%rdi)
+ # leave
+ add %r11,%rsp
+ mov %rdi,%rax
+ mov %rsi,%rdx
+ ret
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
new file mode 100644
index 00000000000..bccb76d8098
--- /dev/null
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -0,0 +1,129 @@
+/*
+ * Glue code for optimized assembly version of Salsa20.
+ *
+ * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
+ *
+ * The assembly codes are public domain assembly codes written by Daniel. J.
+ * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
+ * and to remove extraneous comments and functions that are not needed.
+ * - i586 version, renamed as salsa20-i586-asm_32.S
+ * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
+ * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
+ * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+
+#define SALSA20_IV_SIZE 8U
+#define SALSA20_MIN_KEY_SIZE 16U
+#define SALSA20_MAX_KEY_SIZE 32U
+
+// use the ECRYPT_* function names
+#define salsa20_keysetup ECRYPT_keysetup
+#define salsa20_ivsetup ECRYPT_ivsetup
+#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
+
+struct salsa20_ctx
+{
+ u32 input[16];
+};
+
+asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
+ u32 keysize, u32 ivsize);
+asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
+asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
+ const u8 *src, u8 *dst, u32 bytes);
+
+static int setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
+ salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
+ return 0;
+}
+
+static int encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ struct crypto_blkcipher *tfm = desc->tfm;
+ struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, 64);
+
+ salsa20_ivsetup(ctx, walk.iv);
+
+ if (likely(walk.nbytes == nbytes))
+ {
+ salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr, nbytes);
+ return blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ while (walk.nbytes >= 64) {
+ salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ walk.nbytes - (walk.nbytes % 64));
+ err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
+ }
+
+ if (walk.nbytes) {
+ salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+ walk.dst.virt.addr, walk.nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ return err;
+}
+
+static struct crypto_alg alg = {
+ .cra_name = "salsa20",
+ .cra_driver_name = "salsa20-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct salsa20_ctx),
+ .cra_alignmask = 3,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .setkey = setkey,
+ .encrypt = encrypt,
+ .decrypt = encrypt,
+ .min_keysize = SALSA20_MIN_KEY_SIZE,
+ .max_keysize = SALSA20_MAX_KEY_SIZE,
+ .ivsize = SALSA20_IV_SIZE,
+ }
+ }
+};
+
+static int __init init(void)
+{
+ return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+ crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
+MODULE_ALIAS("salsa20");
+MODULE_ALIAS("salsa20-asm");
diff --git a/arch/x86/crypto/twofish_64.c b/arch/x86/crypto/twofish_64.c
deleted file mode 100644
index 182d91d5cfb..00000000000
--- a/arch/x86/crypto/twofish_64.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Glue Code for optimized x86_64 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
- .cra_name = "twofish",
- .cra_driver_name = "twofish-x86_64",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 3,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = TF_MIN_KEY_SIZE,
- .cia_max_keysize = TF_MAX_KEY_SIZE,
- .cia_setkey = twofish_setkey,
- .cia_encrypt = twofish_encrypt,
- .cia_decrypt = twofish_decrypt
- }
- }
-};
-
-static int __init init(void)
-{
- return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
-MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_glue.c
index e3004dfe9c7..cefaf8b9aa1 100644
--- a/arch/x86/crypto/twofish_32.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -1,5 +1,5 @@
/*
- * Glue Code for optimized 586 assembler version of TWOFISH
+ * Glue Code for assembler optimized version of TWOFISH
*
* Originally Twofish for GPG
* By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
@@ -44,7 +44,6 @@
#include <linux/module.h>
#include <linux/types.h>
-
asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
@@ -60,7 +59,7 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
static struct crypto_alg alg = {
.cra_name = "twofish",
- .cra_driver_name = "twofish-i586",
+ .cra_driver_name = "twofish-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
@@ -93,5 +92,6 @@ module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
MODULE_ALIAS("twofish");
+MODULE_ALIAS("twofish-asm");